balloonmann commited on
Commit
d75cfa2
·
1 Parent(s): 54758f6

fixed validation

Browse files
Files changed (4) hide show
  1. financial_audit_env/pyproject.toml +0 -44
  2. pyproject.toml +4 -0
  3. server/app.py +258 -0
  4. uv.lock +0 -0
financial_audit_env/pyproject.toml DELETED
@@ -1,44 +0,0 @@
1
- [build-system]
2
- requires = ["setuptools>=68.0", "wheel"]
3
- build-backend = "setuptools.build_meta"
4
-
5
- [project]
6
- name = "financial-audit-env"
7
- version = "1.0.0"
8
- description = "OpenEnv-compatible RL environment for financial auditing tasks"
9
- readme = "README.md"
10
- license = {text = "MIT"}
11
- requires-python = ">=3.10"
12
- authors = [
13
- {name = "Harshit"},
14
- ]
15
- keywords = ["openenv", "reinforcement-learning", "financial-audit", "ai-agent"]
16
- classifiers = [
17
- "Development Status :: 4 - Beta",
18
- "Intended Audience :: Science/Research",
19
- "License :: OSI Approved :: MIT License",
20
- "Programming Language :: Python :: 3",
21
- "Programming Language :: Python :: 3.10",
22
- "Programming Language :: Python :: 3.11",
23
- "Topic :: Scientific/Engineering :: Artificial Intelligence",
24
- ]
25
-
26
- dependencies = [
27
- "fastapi>=0.104.0",
28
- "uvicorn>=0.24.0",
29
- "pydantic>=2.5.0",
30
- "requests>=2.31.0",
31
- "openai>=1.0.0",
32
- "python-dotenv>=1.0.0",
33
- ]
34
-
35
- [project.optional-dependencies]
36
- openenv = ["openenv-core>=0.2.0"]
37
- dev = ["pytest>=7.0", "httpx>=0.25.0"]
38
-
39
- [project.urls]
40
- Homepage = "https://github.com/Harshit/financial-audit-env"
41
- Repository = "https://github.com/Harshit/financial-audit-env"
42
-
43
- [tool.setuptools.packages.find]
44
- include = ["financial_audit_env*"]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
pyproject.toml CHANGED
@@ -30,8 +30,12 @@ dependencies = [
30
  "requests>=2.31.0",
31
  "openai>=1.0.0",
32
  "python-dotenv>=1.0.0",
 
33
  ]
34
 
 
 
 
35
  [project.optional-dependencies]
36
  openenv = ["openenv-core>=0.2.0"]
37
  dev = ["pytest>=7.0", "httpx>=0.25.0"]
 
30
  "requests>=2.31.0",
31
  "openai>=1.0.0",
32
  "python-dotenv>=1.0.0",
33
+ "openenv-core>=0.2.0",
34
  ]
35
 
36
+ [project.scripts]
37
+ server = "financial_audit_env.server.app:main"
38
+
39
  [project.optional-dependencies]
40
  openenv = ["openenv-core>=0.2.0"]
41
  dev = ["pytest>=7.0", "httpx>=0.25.0"]
server/app.py ADDED
@@ -0,0 +1,258 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) 2026. All rights reserved.
2
+ # Financial Audit Environment — FastAPI application.
3
+ #
4
+ # Exposes the environment over HTTP/WebSocket with:
5
+ # - Standard OpenEnv endpoints (reset, step, state, health)
6
+ # - Custom endpoints required by the contest:
7
+ # GET /tasks → list of tasks with action schema
8
+ # GET /grader → grader score for last completed episode
9
+ # POST /baseline → trigger baseline inference, return scores
10
+ # - Security middleware (rate limiting, OWASP headers, input validation)
11
+ #
12
+ # Architecture note:
13
+ # When openenv-core IS installed: create_app() provides /reset, /step, /state, /ws.
14
+ # When openenv-core is NOT installed: we provide standalone HTTP versions.
15
+ # Custom endpoints (/tasks, /grader, /baseline) are always registered.
16
+
17
+ import logging
18
+ import os
19
+ from typing import Any, Dict, Optional
20
+
21
+ from fastapi import FastAPI, HTTPException
22
+ from fastapi.responses import JSONResponse
23
+ from pydantic import BaseModel
24
+
25
+ from ..models import AuditAction, AuditObservation
26
+ from .environment import FinancialAuditEnvironment
27
+ from .security import setup_security
28
+ from .tasks import TASKS, get_all_tasks_summary
29
+
30
+ logger = logging.getLogger("financial_audit_env.app")
31
+
32
+ # ---------------------------------------------------------------------------
33
+ # Track whether OpenEnv is providing standard endpoints
34
+ # ---------------------------------------------------------------------------
35
+ _OPENENV_AVAILABLE = False
36
+
37
+ # Global environment instance — used by custom endpoints (/grader, /baseline)
38
+ # and by standalone mode endpoints (/reset, /step, /state)
39
+ _env = FinancialAuditEnvironment()
40
+
41
+ # ---------------------------------------------------------------------------
42
+ # Create the FastAPI app
43
+ # ---------------------------------------------------------------------------
44
+ # We always use our own FastAPI app with a single shared environment instance.
45
+ # OpenEnv's create_app() is NOT used because it creates its own internal
46
+ # environment instance, causing dual-instance routing bugs. Our standalone
47
+ # endpoints provide the same reset/step/state API and are fully spec-compliant.
48
+ app = FastAPI(
49
+ title="Financial Audit Environment",
50
+ description=(
51
+ "An OpenEnv-compatible RL environment for financial auditing tasks. "
52
+ "Agents audit synthetic financial documents to find planted errors."
53
+ ),
54
+ version="1.0.0",
55
+ docs_url="/docs",
56
+ )
57
+ logger.info("Financial Audit Environment — standalone FastAPI mode")
58
+
59
+ # ---------------------------------------------------------------------------
60
+ # Apply security middleware
61
+ # ---------------------------------------------------------------------------
62
+ setup_security(app)
63
+
64
+
65
+ # ---------------------------------------------------------------------------
66
+ # Request/Response models
67
+ # ---------------------------------------------------------------------------
68
+
69
+ class ResetRequest(BaseModel):
70
+ """Request body for the /reset endpoint."""
71
+ task_id: Optional[str] = "expense_audit"
72
+ seed: Optional[int] = 42
73
+ episode_id: Optional[str] = None
74
+
75
+
76
+ class StepRequest(BaseModel):
77
+ """Request body for the /step endpoint."""
78
+ action: AuditAction
79
+
80
+
81
+ class BaselineResponse(BaseModel):
82
+ """Response from the /baseline endpoint."""
83
+ scores: Dict[str, Any]
84
+ model: str
85
+ status: str
86
+
87
+
88
+ # ---------------------------------------------------------------------------
89
+ # Standalone / Override endpoints
90
+ # These always exist regardless of whether openenv-core is installed,
91
+ # because we need consistent behavior for the /reset, /step, /state calls
92
+ # and OpenEnv's create_app may not pass our custom kwargs (task_id, etc.)
93
+ # ---------------------------------------------------------------------------
94
+
95
+ @app.get("/health")
96
+ async def health_check():
97
+ """Health check endpoint — required for HF Space deployment."""
98
+ return {"status": "healthy", "environment": "financial_audit_env"}
99
+
100
+
101
+ @app.post("/reset")
102
+ async def reset_endpoint(request: ResetRequest):
103
+ """
104
+ Reset the environment for a new episode.
105
+
106
+ Generates fresh financial data with planted errors for the given task.
107
+
108
+ Args (JSON body):
109
+ task_id: "expense_audit" | "invoice_match" | "gst_reconciliation"
110
+ seed: Random seed for reproducibility (default: 42)
111
+ episode_id: Optional custom episode ID
112
+ """
113
+ try:
114
+ obs = _env.reset(
115
+ seed=request.seed,
116
+ episode_id=request.episode_id,
117
+ task_id=request.task_id,
118
+ )
119
+ return {
120
+ "observation": obs.model_dump(),
121
+ "done": obs.done,
122
+ "reward": obs.reward,
123
+ }
124
+ except ValueError as e:
125
+ raise HTTPException(status_code=400, detail=str(e))
126
+
127
+
128
+ @app.post("/step")
129
+ async def step_endpoint(request: StepRequest):
130
+ """
131
+ Execute one step in the environment.
132
+
133
+ Submit audit findings and receive feedback + reward.
134
+ Set submit_final=True to end the episode and get final grading.
135
+ """
136
+ try:
137
+ obs = _env.step(request.action)
138
+ return {
139
+ "observation": obs.model_dump(),
140
+ "done": obs.done,
141
+ "reward": obs.reward,
142
+ }
143
+ except RuntimeError as e:
144
+ raise HTTPException(status_code=400, detail=str(e))
145
+ except ValueError as e:
146
+ raise HTTPException(status_code=400, detail=str(e))
147
+
148
+
149
+ @app.get("/state")
150
+ async def state_endpoint():
151
+ """Get current episode state (step count, found errors, etc.)."""
152
+ return _env.state.model_dump()
153
+
154
+
155
+ # ---------------------------------------------------------------------------
156
+ # Contest-required custom endpoints
157
+ # ---------------------------------------------------------------------------
158
+
159
+ @app.get("/tasks")
160
+ async def get_tasks():
161
+ """
162
+ List all available tasks with their descriptions and action schemas.
163
+
164
+ Returns details for all 3 tasks:
165
+ - expense_audit (Easy): Policy violation detection
166
+ - invoice_match (Medium): Three-way PO/GRN/Invoice matching
167
+ - gst_reconciliation (Hard): GST return reconciliation
168
+ """
169
+ return {
170
+ "tasks": get_all_tasks_summary(),
171
+ "total_tasks": len(TASKS),
172
+ }
173
+
174
+
175
+ @app.get("/grader")
176
+ async def get_grader_score():
177
+ """
178
+ Get the grader score for the last completed episode.
179
+
180
+ Returns the F1 score (0.0–1.0) along with precision, recall, and
181
+ error counts. Must complete an episode first.
182
+ """
183
+ result = _env.last_grader_result
184
+ if result is None:
185
+ return {
186
+ "status": "no_completed_episode",
187
+ "message": "No episode completed. Call /reset then /step with submit_final=True.",
188
+ }
189
+
190
+ return {
191
+ "status": "completed",
192
+ "task_id": _env.state.task_id,
193
+ "score": result["score"],
194
+ "precision": result["precision"],
195
+ "recall": result["recall"],
196
+ "true_positives": result["true_positives"],
197
+ "false_positives": result["false_positives"],
198
+ "false_negatives": result["false_negatives"],
199
+ "total_errors": result["total_errors"],
200
+ }
201
+
202
+
203
+ @app.post("/baseline")
204
+ async def run_baseline():
205
+ """
206
+ Run the baseline agent on all 3 tasks and return scores.
207
+
208
+ Uses Meta's Llama 3.1 8B Instruct via HuggingFace Inference API.
209
+ Requires HF_TOKEN environment variable.
210
+ """
211
+ try:
212
+ from ..baseline import run_baseline_all_tasks
213
+ except ImportError:
214
+ return JSONResponse(
215
+ status_code=501,
216
+ content={
217
+ "status": "error",
218
+ "message": "Baseline not available. Run baseline.py directly.",
219
+ },
220
+ )
221
+
222
+ hf_token = os.environ.get("HF_TOKEN", "")
223
+ if not hf_token:
224
+ return JSONResponse(
225
+ status_code=400,
226
+ content={
227
+ "status": "error",
228
+ "message": "HF_TOKEN not set. Get one at https://huggingface.co/settings/tokens",
229
+ },
230
+ )
231
+
232
+ try:
233
+ scores = run_baseline_all_tasks(env=_env, hf_token=hf_token)
234
+ return BaselineResponse(
235
+ scores=scores,
236
+ model="meta-llama/Llama-3.1-8B-Instruct",
237
+ status="completed",
238
+ )
239
+ except Exception as e:
240
+ logger.error(f"Baseline failed: {e}", exc_info=True)
241
+ return JSONResponse(
242
+ status_code=500,
243
+ content={"status": "error", "message": "Baseline failed. Check logs."},
244
+ )
245
+
246
+
247
+ # ---------------------------------------------------------------------------
248
+ # Entry point
249
+ # ---------------------------------------------------------------------------
250
+
251
+ def main():
252
+ """Run the server directly: python -m financial_audit_env.server.app"""
253
+ import uvicorn
254
+ uvicorn.run(app, host="0.0.0.0", port=8000)
255
+
256
+
257
+ if __name__ == "__main__":
258
+ main()
uv.lock ADDED
The diff for this file is too large to render. See raw diff