Spaces:

SolusOps
/

AML_env

Sleeping

App Files Files Community

DataBoySu commited on Apr 12

Commit

97fbf33

1 Parent(s): 81e1efb

deployment without ui

Browse files

Files changed (6) hide show

.dockerignore +5 -1
Dockerfile +62 -32
README.md +85 -49
client.py +45 -42
graders/__init__.py +2 -0
inference.py +5 -5

.dockerignore CHANGED Viewed

@@ -45,4 +45,8 @@ pre-val.sh
 # ── Misc ──────────────────────────────────────────────────────────────────────
 test_redirect.py
 openenv_AML_env.egg-info/
-openenv_tracefix_rl.egg-info/

 # ── Misc ──────────────────────────────────────────────────────────────────────
 test_redirect.py
 openenv_AML_env.egg-info/
+openenv_tracefix_rl.egg-info/
+.venv/
+__pycache__/
+*.pyc
+.git/

Dockerfile CHANGED Viewed

@@ -1,3 +1,9 @@
 # ============================================================
 # AML Investigator — OpenEnv Environment
 # Hugging Face Spaces compliant Docker image
@@ -10,55 +16,79 @@
 #   docker run -p 7860:7860 aml-env
 # ============================================================
-FROM python:3.11-slim
-# --- System dependencies -------------------------------------------------
-# curl  → healthcheck
-# git   → uv may resolve VCS dependencies (openenv from git)
 RUN apt-get update && \
-    apt-get install -y --no-install-recommends curl git && \
     rm -rf /var/lib/apt/lists/*
-# --- Install uv ----------------------------------------------------------
-# uv is the canonical package manager for this project (see uv.lock).
-# We download the pre-built binary so Docker layer caching is fast.
-RUN curl -LsSf https://astral.sh/uv/install.sh | sh && \
-    mv /root/.local/bin/uv /usr/local/bin/uv && \
-    mv /root/.local/bin/uvx /usr/local/bin/uvx
-# --- Working directory ---------------------------------------------------
-WORKDIR /app
-# --- Copy full project context -------------------------------------------
-# Copy everything so uv sync can resolve the full project graph.
-# Unwanted paths are excluded via .dockerignore.
-COPY . /app/
-# --- Install dependencies via uv -----------------------------------------
-# Use --frozen to honour the checked-in uv.lock for reproducibility.
-# Falls back to a live resolve if uv.lock is absent (shouldn't happen).
-RUN if [ -f uv.lock ]; then \
         uv sync --frozen --no-editable; \
     else \
         uv sync --no-editable; \
     fi
-# --- Runtime environment -------------------------------------------------
-# Add the uv-managed venv to PATH so uvicorn / python resolve correctly.
 ENV PATH="/app/.venv/bin:$PATH"
-# PYTHONPATH → repo root so that both of these import patterns work:
-#   from models import AmlAction           (absolute, no package prefix)
-#   from server.AML_env_environment import AmlEnvironment
-ENV PYTHONPATH="/app"
-# Hugging Face Spaces mandates port 7860.
 EXPOSE 7860
-# --- Health check --------------------------------------------------------
 HEALTHCHECK --interval=30s --timeout=5s --start-period=15s --retries=3 \
     CMD curl -f http://localhost:7860/health || exit 1
-# --- Start server --------------------------------------------------------
-# Module path: server/app.py → server.app:app
-CMD ["uvicorn", "server.app:app", "--host", "0.0.0.0", "--port", "7860"]

+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+#
 # ============================================================
 # AML Investigator — OpenEnv Environment
 # Hugging Face Spaces compliant Docker image
 #   docker run -p 7860:7860 aml-env
 # ============================================================
+ARG BASE_IMAGE=ghcr.io/meta-pytorch/openenv-base:latest
+FROM ${BASE_IMAGE} AS builder
+WORKDIR /app
+# git is needed for uv to resolve any VCS dependencies
 RUN apt-get update && \
+    apt-get install -y --no-install-recommends git && \
     rm -rf /var/lib/apt/lists/*
+# Copy full build context (unwanted files pruned by .dockerignore)
+COPY . /app/env
+WORKDIR /app/env
+# Ensure uv is available (the openenv-base image usually has it; install as fallback)
+RUN if ! command -v uv >/dev/null 2>&1; then \
+        curl -LsSf https://astral.sh/uv/install.sh | sh && \
+        mv /root/.local/bin/uv /usr/local/bin/uv && \
+        mv /root/.local/bin/uvx /usr/local/bin/uvx; \
+    fi
+# Install deps only (no project install yet) — uses --frozen so uv.lock is honoured
+RUN --mount=type=cache,target=/root/.cache/uv \
+    if [ -f uv.lock ]; then \
+        uv sync --frozen --no-install-project --no-editable; \
+    else \
+        uv sync --no-install-project --no-editable; \
+    fi
+# Install the project itself into the venv
+RUN --mount=type=cache,target=/root/.cache/uv \
+    if [ -f uv.lock ]; then \
         uv sync --frozen --no-editable; \
     else \
         uv sync --no-editable; \
     fi
+# ── Runtime stage ─────────────────────────────────────────────────────────────
+FROM ${BASE_IMAGE}
+WORKDIR /app
+# curl is required for the HEALTHCHECK; install it in the RUNTIME stage
+RUN apt-get update && \
+    apt-get install -y --no-install-recommends curl && \
+    rm -rf /var/lib/apt/lists/*
+# Copy venv and source from builder
+COPY --from=builder /app/env/.venv /app/.venv
+COPY --from=builder /app/env /app/env
+# Create unprivileged user (good practice for HF Spaces)
+RUN useradd -m -u 1000 appuser && \
+    chown -R appuser:appuser /app
+# The venv bin directory must be first on PATH
 ENV PATH="/app/.venv/bin:$PATH"
+# PYTHONPATH → /app/env (repo root inside container)
+# This makes both import styles work:
+#   from models import AmlAction             (bare)
+#   from server.AML_env_environment import … (prefixed)
+ENV PYTHONPATH="/app/env"
+# Hugging Face Spaces mandates port 7860
 EXPOSE 7860
+# Health check — verifiable with `docker inspect`
 HEALTHCHECK --interval=30s --timeout=5s --start-period=15s --retries=3 \
     CMD curl -f http://localhost:7860/health || exit 1
+WORKDIR /app/env
+USER appuser
+# Start the OpenEnv FastAPI server
+CMD ["uvicorn", "server.app:app", "--host", "0.0.0.0", "--port", "7860"]

README.md CHANGED Viewed

@@ -11,9 +11,11 @@ tags:
   - openenv
 ---
-# Aml Env Environment
-A simple test environment that echoes back messages. Perfect for testing the env APIs as well as demonstrating environment usage patterns.
 ## Quick Start
@@ -23,26 +25,33 @@ The simplest way to use the Aml Env environment is through the `AmlEnv` class:
 from AML_env import AmlAction, AmlEnv
 try:
-    # Create environment from Docker image
-    AML_envenv = AmlEnv.from_docker_image("AML_env-env:latest")
-    # Reset
-    result = AML_envenv.reset()
-    print(f"Reset: {result.observation.echoed_message}")
-    # Send multiple messages
-    messages = ["Hello, World!", "Testing echo", "Final message"]
-    for msg in messages:
-        result = AML_envenv.step(AmlAction(message=msg))
-        print(f"Sent: '{msg}'")
-        print(f"  → Echoed: '{result.observation.echoed_message}'")
-        print(f"  → Length: {result.observation.message_length}")
-        print(f"  → Reward: {result.reward}")
 finally:
-    # Always clean up
-    AML_envenv.close()
 ```
 That's it! The `AmlEnv.from_docker_image()` method handles:
@@ -57,7 +66,7 @@ Before using the environment, you need to build the Docker image:
 ```bash
 # From project root
-docker build -t AML_env-env:latest -f server/Dockerfile .
 ```
 ## Deploying to Hugging Face Spaces
@@ -118,23 +127,34 @@ The deployed space includes:
 ## Environment Details
-### Action
-**AmlAction**: Contains a single field
-- `message` (str) - The message to echo back
-### Observation
-**AmlObservation**: Contains the echo response and metadata
-- `echoed_message` (str) - The message echoed back
-- `message_length` (int) - Length of the message
-- `reward` (float) - Reward based on message length (length × 0.1)
-- `done` (bool) - Always False for echo environment
-- `metadata` (dict) - Additional info like step count
 ### Reward
-The reward is calculated as: `message_length × 0.1`
-- "Hi" → reward: 0.2
-- "Hello, World!" → reward: 1.3
-- Empty message → reward: 0.0
 ## Advanced Usage
@@ -239,17 +259,33 @@ uvicorn server.app:app --reload
 ```
 AML_env/
-├── .dockerignore         # Docker build exclusions
-├── __init__.py            # Module exports
-├── README.md              # This file
-├── openenv.yaml           # OpenEnv manifest
-├── pyproject.toml         # Project metadata and dependencies
-├── uv.lock                # Locked dependencies (generated)
-├── client.py              # AmlEnv client
-├── models.py              # Action and Observation models
-└── server/
-    ├── __init__.py        # Server module exports
-    ├── AML_env_environment.py  # Core environment logic
-    ├── app.py             # FastAPI application (HTTP + WebSocket endpoints)
-    └── Dockerfile         # Container image definition
 ```

   - openenv
 ---
+# AML Investigator Environment
+A financial crime investigation environment for Reinforcement Learning agents.
+The agent must query a mock banking system (transactions, KYC records) under a strict API budget
+to investigate flagged accounts and submit a final fraud/clear decision.
 ## Quick Start
 from AML_env import AmlAction, AmlEnv
 try:
+    # Create environment from Docker image (built from root Dockerfile)
+    env = AmlEnv.from_docker_image("aml-env:latest")
+    # Reset to a specific task
+    obs = env.reset(task="aml_easy")
+    print(f"Alert: {obs.observation.alert_details}")
+    print(f"Budget: {obs.observation.budget_remaining}")
+    # Query transactions
+    result = env.step(AmlAction(action={
+        "action_type": "query_transactions",
+        "account_id": "ACC-9001",
+        "limit": 10,
+        "offset": 0,
+    }))
+    print(f"Transactions: {result.observation.last_action_result}")
+    # Submit final decision
+    result = env.step(AmlAction(action={
+        "action_type": "submit_decision",
+        "decision": "CLEAR",
+        "evidence_links": [],
+    }))
+    print(f"Done: {result.done}, Reward: {result.reward}")
 finally:
+    env.close()
 ```
 That's it! The `AmlEnv.from_docker_image()` method handles:
 ```bash
 # From project root
+docker build -t aml-env:latest .
 ```
 ## Deploying to Hugging Face Spaces
 ## Environment Details
+### Action Space
+**AmlAction** wraps one of four tool calls (discriminated by `action_type`):
+| Tool | Fields | Description |
+|---|---|---|
+| `query_transactions` | `account_id`, `limit`, `offset` | Paginated transaction history for an account |
+| `search_transactions` | `account_id`, `keyword` | Search memo_text of transactions |
+| `get_kyc_record` | `entity_id` | Retrieve KYC data for an entity |
+| `submit_decision` | `decision` (`FRAUD`\|`CLEAR`), `evidence_links` | Final verdict — ends the episode |
+### Observation Space
+**AmlObservation** is returned after every `reset()` and `step()`:
+| Field | Type | Description |
+|---|---|---|
+| `alert_details` | `str` | The investigation mission (constant per episode) |
+| `budget_remaining` | `int` | API calls left before forced termination |
+| `last_action` | `str \| None` | Name of the last tool called |
+| `last_action_result` | `Any` | Payload returned by the last tool |
+| `error_message` | `str \| None` | Error string if the last action failed |
+| `done` | `bool` | Whether the episode has ended |
+| `reward` | `float` | Per-step reward signal |
 ### Reward
+- **Per step:** `-0.02` (efficiency penalty discourages random looping)
+- **Submit FRAUD (correct):** grader returns `0.4`–`1.0` depending on evidence quality
+- **Submit CLEAR (correct false positive):** grader returns `1.0`
+- **Budget exhausted without submission:** episode ends with accumulated negative rewards
 ## Advanced Usage
 ```
 AML_env/
+├── Dockerfile                    # Container image (root, HF Spaces compliant)
+├── .dockerignore                 # Docker build exclusions
+├── .hfignore                     # HF Space upload exclusions
+├── .gitignore                    # Git exclusions
+├── __init__.py                   # Package exports (AmlEnv, AmlAction, AmlObservation)
+├── client.py                     # AmlEnv WebSocket client
+├── models.py                     # Pydantic action/observation schemas
+├── inference.py                  # Baseline RL agent (OpenAI client, [START]/[STEP]/[END] logs)
+├── openenv.yaml                  # OpenEnv manifest (tasks, graders, port)
+├── pyproject.toml                # Project metadata and uv dependencies
+├── uv.lock                       # Locked dependency graph
+├── README.md                     # This file (also HF Space card)
+├── data/
+│   ├── entities.json             # 312 KYC entity records
+│   ├── accounts.json             # 410 bank accounts
+│   └── transactions.json         # 5,079 transactions (haystack + fraud scenarios)
+├── graders/
+│   ├── __init__.py
+│   ├── aml_easy.py               # "The False Positive" grader
+│   ├── aml_medium.py             # "The Smurf Network" grader
+│   └── aml_hard.py               # "The Corporate Mirage" grader
+├── server/
+│   ├── __init__.py
+│   ├── AML_env_environment.py    # Core OpenEnv environment (reset/step/state)
+│   ├── app.py                    # FastAPI server (CORS, create_app wrapper)
+│   └── requirements.txt          # Pip fallback requirements
+└── tools/
+    ├── haystack.py               # Financial graph generator
+    └── tasks.json                # Manual fraud scenario definitions
 ```

client.py CHANGED Viewed

@@ -4,7 +4,11 @@
 # This source code is licensed under the BSD-style license found in the
 # LICENSE file in the root directory of this source tree.
-"""Aml Env Environment Client."""
 from typing import Dict
@@ -15,83 +19,82 @@ from openenv.core.env_server.types import State
 from .models import AmlAction, AmlObservation
-class AmlEnv(
-    EnvClient[AmlAction, AmlObservation, State]
-):
     """
-    Client for the Aml Env Environment.
-    This client maintains a persistent WebSocket connection to the environment server,
-    enabling efficient multi-step interactions with lower latency.
-    Each client instance has its own dedicated environment session on the server.
-    Example:
-        >>> # Connect to a running server
-        >>> with AmlEnv(base_url="http://localhost:8000") as client:
-        ...     result = client.reset()
-        ...     print(result.observation.echoed_message)
-        ...
-        ...     result = client.step(AmlAction(message="Hello!"))
-        ...     print(result.observation.echoed_message)
-    Example with Docker:
-        >>> # Automatically start container and connect
-        >>> client = AmlEnv.from_docker_image("AML_env-env:latest")
         >>> try:
-        ...     result = client.reset()
-        ...     result = client.step(AmlAction(message="Test"))
         ... finally:
         ...     client.close()
     """
     def _step_payload(self, action: AmlAction) -> Dict:
         """
-        Convert AmlAction to JSON payload for step message.
         Args:
-            action: AmlAction instance
         Returns:
-            Dictionary representation suitable for JSON encoding
         """
-        return {
-            "message": action.message,
-        }
     def _parse_result(self, payload: Dict) -> StepResult[AmlObservation]:
         """
-        Parse server response into StepResult[AmlObservation].
         Args:
-            payload: JSON response data from server
         Returns:
-            StepResult with AmlObservation
         """
         obs_data = payload.get("observation", {})
         observation = AmlObservation(
-            echoed_message=obs_data.get("echoed_message", ""),
-            message_length=obs_data.get("message_length", 0),
             done=payload.get("done", False),
-            reward=payload.get("reward"),
-            metadata=obs_data.get("metadata", {}),
         )
         return StepResult(
             observation=observation,
-            reward=payload.get("reward"),
             done=payload.get("done", False),
         )
     def _parse_state(self, payload: Dict) -> State:
         """
-        Parse server response into State object.
         Args:
-            payload: JSON response from state request
         Returns:
-            State object with episode_id and step_count
         """
         return State(
             episode_id=payload.get("episode_id"),

 # This source code is licensed under the BSD-style license found in the
 # LICENSE file in the root directory of this source tree.
+"""AML Investigator Environment Client.
+High-level WebSocket client that wraps the OpenEnv EnvClient base class
+with AML-specific action/observation types.
+"""
 from typing import Dict
 from .models import AmlAction, AmlObservation
+class AmlEnv(EnvClient[AmlAction, AmlObservation, State]):
     """
+    WebSocket client for the AML Investigator environment.
+    Maintains a persistent WebSocket connection to the environment server,
+    enabling efficient multi-step investigations with lower per-step latency.
+    Example (Docker):
+        >>> client = AmlEnv.from_docker_image("aml-env:latest")
         >>> try:
+        ...     obs = client.reset(task="aml_easy")
+        ...     result = client.step(AmlAction(action={
+        ...         "action_type": "query_transactions",
+        ...         "account_id": "ACC-9001"
+        ...     }))
+        ...     print(result.observation.last_action_result)
         ... finally:
         ...     client.close()
+    Example (existing server):
+        >>> with AmlEnv(base_url="http://localhost:7860") as env:
+        ...     obs = env.reset(task="aml_easy")
+        ...     result = env.step(AmlAction(action={
+        ...         "action_type": "submit_decision",
+        ...         "decision": "CLEAR",
+        ...         "evidence_links": []
+        ...     }))
     """
     def _step_payload(self, action: AmlAction) -> Dict:
         """
+        Serialize AmlAction to the JSON dict sent over the WebSocket.
         Args:
+            action: Typed AmlAction wrapper containing the specific tool call.
         Returns:
+            Dict with the nested ``action`` key the server expects.
         """
+        return action.model_dump()
     def _parse_result(self, payload: Dict) -> StepResult[AmlObservation]:
         """
+        Deserialize the server's JSON response into a typed StepResult.
         Args:
+            payload: Raw JSON response dict from the server.
         Returns:
+            StepResult containing an AmlObservation.
         """
         obs_data = payload.get("observation", {})
         observation = AmlObservation(
+            alert_details=obs_data.get("alert_details", ""),
+            budget_remaining=obs_data.get("budget_remaining", 0),
+            last_action=obs_data.get("last_action"),
+            last_action_result=obs_data.get("last_action_result"),
+            error_message=obs_data.get("error_message"),
             done=payload.get("done", False),
+            reward=payload.get("reward", 0.0),
         )
         return StepResult(
             observation=observation,
+            reward=payload.get("reward", 0.0),
             done=payload.get("done", False),
         )
     def _parse_state(self, payload: Dict) -> State:
         """
+        Deserialize the server's /state response into a State object.
         Args:
+            payload: Raw JSON response dict from the server.
         Returns:
+            State with episode_id and step_count.
         """
         return State(
             episode_id=payload.get("episode_id"),

graders/__init__.py ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ # Graders package — makes graders/ a proper Python package so OpenEnv can
2	+ # resolve grader paths like "graders.aml_easy:grade" as module imports.

inference.py CHANGED Viewed

@@ -15,14 +15,14 @@ from openenv.core.env_server.interfaces import Environment
 from server.AML_env_environment import AmlEnvironment
 from models import AmlAction
-API_KEY = os.getenv("HF_TOKEN") or os.getenv("OPENAI_API_KEY")
-API_BASE_URL = os.getenv("API_BASE_URL") or "https://router.huggingface.co/v1"
-MODEL_NAME = os.getenv("MODEL_NAME") or "Qwen/Qwen2.5-72B-Instruct"
 # Must match openenv.yaml EXACTLY
 TASKS = ["aml_easy", "aml_medium", "aml_hard"]
 BENCHMARK = "aml_investigator"
-MAX_STEPS = 25 # High enough to allow the budget to kill the episode organically
 SYSTEM_PROMPT = textwrap.dedent(
     """
@@ -125,7 +125,7 @@ async def main() -> None:
                     break
             # Calculate a baseline score for the stdout logs (Graders handle real scoring)
-            score = sum(rewards) + 1.0 if "submit_decision" in obs.last_action else 0.0
             score = min(max(score, 0.0), 1.0)
             success = score > 0.5

 from server.AML_env_environment import AmlEnvironment
 from models import AmlAction
+API_KEY = os.getenv("HF_TOKEN") or "lm-studio"
+API_BASE_URL = os.getenv("API_BASE_URL") or "https://router.huggingface.co/v1" or "http://localhost:1234/v1"
+MODEL_NAME = os.getenv("MODEL_NAME") or "openai/gpt-oss-20b"
 # Must match openenv.yaml EXACTLY
 TASKS = ["aml_easy", "aml_medium", "aml_hard"]
 BENCHMARK = "aml_investigator"
+MAX_STEPS = 25
 SYSTEM_PROMPT = textwrap.dedent(
     """
                     break
             # Calculate a baseline score for the stdout logs (Graders handle real scoring)
+            score = sum(rewards) + 1.0 if "submit_decision" in (obs.last_action or "") else 0.0
             score = min(max(score, 0.0), 1.0)
             success = score > 0.5