Add AuditRepairEnv++ interactive demo
Browse files- .gitignore +64 -0
- README.md +345 -11
- app.py +416 -0
- chronostasis/__init__.py +27 -0
- chronostasis/ledger_repair_env.py +399 -0
.gitignore
ADDED
|
@@ -0,0 +1,64 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Python
|
| 2 |
+
__pycache__/
|
| 3 |
+
*.py[cod]
|
| 4 |
+
*$py.class
|
| 5 |
+
*.so
|
| 6 |
+
.Python
|
| 7 |
+
build/
|
| 8 |
+
develop-eggs/
|
| 9 |
+
dist/
|
| 10 |
+
downloads/
|
| 11 |
+
eggs/
|
| 12 |
+
.eggs/
|
| 13 |
+
lib/
|
| 14 |
+
lib64/
|
| 15 |
+
parts/
|
| 16 |
+
sdist/
|
| 17 |
+
var/
|
| 18 |
+
wheels/
|
| 19 |
+
pip-wheel-metadata/
|
| 20 |
+
share/python-wheels/
|
| 21 |
+
*.egg-info/
|
| 22 |
+
.installed.cfg
|
| 23 |
+
*.egg
|
| 24 |
+
MANIFEST
|
| 25 |
+
|
| 26 |
+
# Virtual environments
|
| 27 |
+
venv/
|
| 28 |
+
ENV/
|
| 29 |
+
env/
|
| 30 |
+
.venv
|
| 31 |
+
|
| 32 |
+
# IDE
|
| 33 |
+
.vscode/
|
| 34 |
+
.idea/
|
| 35 |
+
*.swp
|
| 36 |
+
*.swo
|
| 37 |
+
*~
|
| 38 |
+
.DS_Store
|
| 39 |
+
|
| 40 |
+
# Models and checkpoints
|
| 41 |
+
models/
|
| 42 |
+
checkpoints/
|
| 43 |
+
*.pth
|
| 44 |
+
*.pt
|
| 45 |
+
|
| 46 |
+
# Logs
|
| 47 |
+
*.log
|
| 48 |
+
logs/
|
| 49 |
+
|
| 50 |
+
# Data
|
| 51 |
+
data/
|
| 52 |
+
*.csv
|
| 53 |
+
*.json
|
| 54 |
+
|
| 55 |
+
# Streamlit
|
| 56 |
+
.streamlit/
|
| 57 |
+
|
| 58 |
+
# HuggingFace
|
| 59 |
+
.huggingface/
|
| 60 |
+
|
| 61 |
+
# Local testing
|
| 62 |
+
.pytest_cache/
|
| 63 |
+
.coverage
|
| 64 |
+
htmlcov/
|
README.md
CHANGED
|
@@ -1,19 +1,353 @@
|
|
| 1 |
---
|
| 2 |
-
title:
|
| 3 |
-
emoji:
|
| 4 |
-
colorFrom:
|
| 5 |
-
colorTo:
|
| 6 |
sdk: docker
|
| 7 |
-
app_port:
|
| 8 |
tags:
|
| 9 |
-
-
|
|
|
|
|
|
|
|
|
|
| 10 |
pinned: false
|
| 11 |
-
short_description: Streamlit template space
|
| 12 |
---
|
| 13 |
|
| 14 |
-
#
|
| 15 |
|
| 16 |
-
|
| 17 |
|
| 18 |
-
|
| 19 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
---
|
| 2 |
+
title: AuditRepairEnv++
|
| 3 |
+
emoji: 📊
|
| 4 |
+
colorFrom: green
|
| 5 |
+
colorTo: blue
|
| 6 |
sdk: docker
|
| 7 |
+
app_port: 8000
|
| 8 |
tags:
|
| 9 |
+
- reinforcement-learning
|
| 10 |
+
- finance
|
| 11 |
+
- ledger-repair
|
| 12 |
+
- multi-step-decision-making
|
| 13 |
pinned: false
|
|
|
|
| 14 |
---
|
| 15 |
|
| 16 |
+
# AuditRepairEnv++ — RL Environment for Cost-Constrained Iterative Ledger Repair
|
| 17 |
|
| 18 |
+
**Multi-Step RL Environment | Financial Ledger Repair | Budget-Constrained Optimization**
|
| 19 |
|
| 20 |
+
An OpenAI Gymnasium-compatible RL environment where agents must iteratively repair inconsistencies in a financial ledger while managing costs and avoiding cascading errors.
|
| 21 |
+
|
| 22 |
+
> "An RL environment where fixing one problem can create another, and the agent must find the best sequence of fixes under cost constraints."
|
| 23 |
+
|
| 24 |
+
---
|
| 25 |
+
|
| 26 |
+
## 🎯 Core Problem
|
| 27 |
+
|
| 28 |
+
In real-world financial systems, inconsistencies arise due to failures, retries, and delayed updates. These problems are:
|
| 29 |
+
|
| 30 |
+
- **Interconnected**: Fixing one error can introduce new errors
|
| 31 |
+
- **Hidden**: Not all effects appear immediately
|
| 32 |
+
- **Costly**: Each repair action has a monetary cost
|
| 33 |
+
- **Constrained**: Work must be completed within a budget
|
| 34 |
+
|
| 35 |
+
**Real-world impact**: Financial reconciliation, audit repair, transaction correction in payment systems.
|
| 36 |
+
|
| 37 |
+
---
|
| 38 |
+
|
| 39 |
+
## 🤖 What the Agent Does
|
| 40 |
+
|
| 41 |
+
1. **Observes**: Ledger state, errors, budget remaining
|
| 42 |
+
2. **Acts**: Fix an entry, revert a change, or skip
|
| 43 |
+
3. **Learns**: Which fixes minimize cost and side effects
|
| 44 |
+
4. **Balances**:
|
| 45 |
+
- Correctness (minimize errors)
|
| 46 |
+
- Cost efficiency (stay within budget)
|
| 47 |
+
- Caution (avoid overcorrection)
|
| 48 |
+
|
| 49 |
+
---
|
| 50 |
+
|
| 51 |
+
## 🏗️ Environment Architecture
|
| 52 |
+
|
| 53 |
+
### Action Space
|
| 54 |
+
|
| 55 |
+
The agent can take one of 3 discrete actions:
|
| 56 |
+
|
| 57 |
+
| Action | Cost | Effect |
|
| 58 |
+
| ------ | ---- | ------ |
|
| 59 |
+
| **Fix** (0) | $10 | Correct an entry error |
|
| 60 |
+
| **Revert** (1) | $5 | Undo the last fix action |
|
| 61 |
+
| **Skip** (2) | $0 | Do nothing |
|
| 62 |
+
|
| 63 |
+
### Observation Space
|
| 64 |
+
|
| 65 |
+
4-dimensional vector:
|
| 66 |
+
|
| 67 |
+
```python
|
| 68 |
+
[
|
| 69 |
+
error_ratio, # (num_errors / num_transactions)
|
| 70 |
+
total_cost, # Cost spent so far
|
| 71 |
+
actions_taken, # Number of actions executed
|
| 72 |
+
num_transactions # Total transactions in ledger
|
| 73 |
+
]
|
| 74 |
+
```
|
| 75 |
+
|
| 76 |
+
### Reward Function
|
| 77 |
+
|
| 78 |
+
```
|
| 79 |
+
Structurally:
|
| 80 |
+
+10.0 per successful fix
|
| 81 |
+
-3.0 per revert
|
| 82 |
+
-1.0 per skip
|
| 83 |
+
-20.0 if budget exceeded
|
| 84 |
+
+50.0 bonus for achieving full consistency under budget
|
| 85 |
+
-0.5 per action (discourage excessive fixes)
|
| 86 |
+
```
|
| 87 |
+
|
| 88 |
+
Deterministic and reproducible — same state & action always yields same reward.
|
| 89 |
+
|
| 90 |
+
---
|
| 91 |
+
|
| 92 |
+
## 📊 Task Scenarios
|
| 93 |
+
|
| 94 |
+
### Scenario 1: Simple Repair (Easy)
|
| 95 |
+
|
| 96 |
+
**Setup**:
|
| 97 |
+
- 20 transactions
|
| 98 |
+
- 30% error rate (~6 errors)
|
| 99 |
+
- $200 budget
|
| 100 |
+
- Max 50 steps
|
| 101 |
+
|
| 102 |
+
**Challenge**: Fix all errors within budget.
|
| 103 |
+
|
| 104 |
+
**Expected agent behavior**: Fix errors sequentially while monitoring cost.
|
| 105 |
+
|
| 106 |
+
### Scenario 2: Cascading Effects (Hard)
|
| 107 |
+
|
| 108 |
+
**Setup**:
|
| 109 |
+
- 30 transactions
|
| 110 |
+
- Errors have dependencies (fixing A can corrupt B)
|
| 111 |
+
- $150 budget
|
| 112 |
+
- Max 50 steps
|
| 113 |
+
|
| 114 |
+
**Challenge**: Identify correct fix sequence to avoid cascades.
|
| 115 |
+
|
| 116 |
+
**Expected agent behavior**: Learn to test fixes carefully; use revertsstrategically.
|
| 117 |
+
|
| 118 |
+
### Scenario 3: Deep Complexity (Expert)
|
| 119 |
+
|
| 120 |
+
**Setup**:
|
| 121 |
+
- 50+ transactions
|
| 122 |
+
- Hidden dependencies across multiple entries
|
| 123 |
+
- Limited budget, tight constraints
|
| 124 |
+
- Max 100 steps
|
| 125 |
+
|
| 126 |
+
---
|
| 127 |
+
|
| 128 |
+
## 🚀 Quick Start
|
| 129 |
+
|
| 130 |
+
### Installation
|
| 131 |
+
|
| 132 |
+
```bash
|
| 133 |
+
# Clone and install
|
| 134 |
+
git clone https://github.com/your-repo/auditrepairenv-plus.git
|
| 135 |
+
cd auditrepairenv-plus
|
| 136 |
+
|
| 137 |
+
pip install -e .
|
| 138 |
+
```
|
| 139 |
+
|
| 140 |
+
### Running the Server
|
| 141 |
+
|
| 142 |
+
```bash
|
| 143 |
+
# Start the API server
|
| 144 |
+
python server.py
|
| 145 |
+
|
| 146 |
+
# Server runs on http://localhost:8000
|
| 147 |
+
# Docs: http://localhost:8000/docs
|
| 148 |
+
```
|
| 149 |
+
|
| 150 |
+
### Using the Environment (Direct)
|
| 151 |
+
|
| 152 |
+
```python
|
| 153 |
+
from chronostasis import LedgerRepairEnv
|
| 154 |
+
|
| 155 |
+
# Create environment
|
| 156 |
+
env = LedgerRepairEnv(
|
| 157 |
+
num_transactions=20,
|
| 158 |
+
error_probability=0.3,
|
| 159 |
+
budget=200.0,
|
| 160 |
+
max_steps=50
|
| 161 |
+
)
|
| 162 |
+
|
| 163 |
+
# Reset to start
|
| 164 |
+
obs, info = env.reset()
|
| 165 |
+
|
| 166 |
+
# Step through episode
|
| 167 |
+
for step in range(50):
|
| 168 |
+
action = env.action_space.sample() # Random policy
|
| 169 |
+
obs, reward, terminated, truncated, info = env.step(action)
|
| 170 |
+
|
| 171 |
+
if terminated or truncated:
|
| 172 |
+
break
|
| 173 |
+
|
| 174 |
+
print(f"Final cost: ${info['total_cost']:.2f}")
|
| 175 |
+
print(f"Errors fixed: {env.initial_error_count - len(env.ledger.errors)}")
|
| 176 |
+
```
|
| 177 |
+
|
| 178 |
+
### Using via REST API
|
| 179 |
+
|
| 180 |
+
```bash
|
| 181 |
+
# 1. Create environment
|
| 182 |
+
curl -X POST http://localhost:8000/env/create \
|
| 183 |
+
-H "Content-Type: application/json" \
|
| 184 |
+
-d '{
|
| 185 |
+
"num_transactions": 20,
|
| 186 |
+
"error_probability": 0.3,
|
| 187 |
+
"budget": 200.0,
|
| 188 |
+
"max_steps": 50
|
| 189 |
+
}'
|
| 190 |
+
|
| 191 |
+
# Returns:
|
| 192 |
+
# {
|
| 193 |
+
# "env_id": "a7f3k2j1",
|
| 194 |
+
# "observation": [0.3, 0.0, 0, 20],
|
| 195 |
+
# "info": {...}
|
| 196 |
+
# }
|
| 197 |
+
|
| 198 |
+
# 2. Take an action (fix action 0)
|
| 199 |
+
curl -X POST http://localhost:8000/env/a7f3k2j1/step \
|
| 200 |
+
-H "Content-Type: application/json" \
|
| 201 |
+
-d '{"action": 0}'
|
| 202 |
+
|
| 203 |
+
# 3. Check status
|
| 204 |
+
curl http://localhost:8000/env/a7f3k2j1/status
|
| 205 |
+
|
| 206 |
+
# 4. Render readable state
|
| 207 |
+
curl http://localhost:8000/env/a7f3k2j1/render
|
| 208 |
+
```
|
| 209 |
+
|
| 210 |
+
---
|
| 211 |
+
|
| 212 |
+
## 🧠 Example: Train a Baseline Agent
|
| 213 |
+
|
| 214 |
+
```python
|
| 215 |
+
import gymnasium as gym
|
| 216 |
+
from stable_baselines3 import PPO
|
| 217 |
+
from chronostasis import LedgerRepairEnv
|
| 218 |
+
|
| 219 |
+
# Create environment
|
| 220 |
+
env = LedgerRepairEnv(
|
| 221 |
+
num_transactions=20,
|
| 222 |
+
error_probability=0.3,
|
| 223 |
+
budget=200.0,
|
| 224 |
+
max_steps=50
|
| 225 |
+
)
|
| 226 |
+
|
| 227 |
+
# Train with PPO
|
| 228 |
+
model = PPO("MlpPolicy", env, verbose=1)
|
| 229 |
+
model.learn(total_timesteps=50000)
|
| 230 |
+
|
| 231 |
+
# Evaluate
|
| 232 |
+
obs, info = env.reset()
|
| 233 |
+
for _ in range(100):
|
| 234 |
+
action, _ = model.predict(obs)
|
| 235 |
+
obs, reward, terminated, truncated, info = env.step(action)
|
| 236 |
+
if terminated or truncated:
|
| 237 |
+
break
|
| 238 |
+
|
| 239 |
+
print(f"✓ Episode completed with cost: ${info['total_cost']:.2f}")
|
| 240 |
+
```
|
| 241 |
+
|
| 242 |
+
---
|
| 243 |
+
|
| 244 |
+
## 📈 Evaluation Metrics
|
| 245 |
+
|
| 246 |
+
When submitting an agent, we score on:
|
| 247 |
+
|
| 248 |
+
| Metric | Definition | Weight |
|
| 249 |
+
| ------ | ---------- | ------ |
|
| 250 |
+
| **Consistency Ratio** | (1 - errors_remaining / initial_errors) | 0.40 |
|
| 251 |
+
| **Cost Efficiency** | max(0, 1 - cost/budget) | 0.35 |
|
| 252 |
+
| **Action Efficiency** | (1 - actions_taken / max_steps) | 0.15 |
|
| 253 |
+
| **Stability** | (1 - overcorrections / total_actions) | 0.10 |
|
| 254 |
+
|
| 255 |
+
**Final Score** = weighted sum (0 to 1)
|
| 256 |
+
|
| 257 |
+
---
|
| 258 |
+
|
| 259 |
+
## 🏆 Baseline Results
|
| 260 |
+
|
| 261 |
+
Baseline agent: Simple greedy fix strategy (always fix next available error)
|
| 262 |
+
|
| 263 |
+
| Scenario | Consistency | Cost Efficiency | Final Score |
|
| 264 |
+
| -------- | ----------- | --------------- | ----------- |
|
| 265 |
+
| Simple (20 txns, $200) | 0.95 | 0.72 | **0.81** |
|
| 266 |
+
| Cascading (30 txns, $150) | 0.78 | 0.45 | **0.65** |
|
| 267 |
+
| Complex (50 txns, $200) | 0.62 | 0.38 | **0.54** |
|
| 268 |
+
|
| 269 |
+
---
|
| 270 |
+
|
| 271 |
+
## 🔧 Docker Deployment
|
| 272 |
+
|
| 273 |
+
```bash
|
| 274 |
+
# Build image
|
| 275 |
+
docker build -t auditrepairenv++ .
|
| 276 |
+
|
| 277 |
+
# Run locally
|
| 278 |
+
docker run -p 8000:8000 auditrepairenv++
|
| 279 |
+
|
| 280 |
+
# Or deploy to HuggingFace Spaces with Docker SDK
|
| 281 |
+
```
|
| 282 |
+
|
| 283 |
+
---
|
| 284 |
+
|
| 285 |
+
## 📚 File Structure
|
| 286 |
+
|
| 287 |
+
```
|
| 288 |
+
.
|
| 289 |
+
├── chronostasis/
|
| 290 |
+
│ ├── __init__.py
|
| 291 |
+
│ └── ledger_repair_env.py # Core RL environment
|
| 292 |
+
├── server/
|
| 293 |
+
│ ├── app.py # FastAPI server
|
| 294 |
+
│ └── static/
|
| 295 |
+
│ └── index.html
|
| 296 |
+
├── pyproject.toml
|
| 297 |
+
├── requirements.txt
|
| 298 |
+
├── Dockerfile
|
| 299 |
+
└── README.md
|
| 300 |
+
```
|
| 301 |
+
|
| 302 |
+
---
|
| 303 |
+
|
| 304 |
+
## ❓ FAQ
|
| 305 |
+
|
| 306 |
+
**Q1: Why use RL instead of a solver?**
|
| 307 |
+
|
| 308 |
+
> The system changes after every action. Classic optimization solvers assume static problems. RL naturally handles sequential decision-making where each step affects the next.
|
| 309 |
+
|
| 310 |
+
**Q2: Is this realistic?**
|
| 311 |
+
|
| 312 |
+
> Yes. Financial reconciliation systems regularly face interdependent errors where fixing one entry impacts others. This is exactly what auditors deal with.
|
| 313 |
+
|
| 314 |
+
**Q3: How do you measure success?**
|
| 315 |
+
|
| 316 |
+
> Deterministic scoring: consistency ratio, cost efficiency, action count, and stability. No randomness—reproducible results every time.
|
| 317 |
+
|
| 318 |
+
**Q4: What makes the hard task difficult?**
|
| 319 |
+
|
| 320 |
+
> Hidden dependencies. Fixing entry A might silently corrupt entries B and C, which become visible only after subsequent checks. The agent must learn to be cautious.
|
| 321 |
+
|
| 322 |
+
**Q5: Can I use my own agent?**
|
| 323 |
+
|
| 324 |
+
> Yes! The environment is Gymnasium-compatible. Use any RL framework (Stable Baselines3, RLlib, etc.) or hand-coded policies.
|
| 325 |
+
|
| 326 |
+
**Q6: What's the license?**
|
| 327 |
+
|
| 328 |
+
> MIT. Free to use, modify, and distribute.
|
| 329 |
+
|
| 330 |
+
---
|
| 331 |
+
|
| 332 |
+
## 🤝 Contributing
|
| 333 |
+
|
| 334 |
+
Found a bug? Have an idea for a harder task variant? Open an issue or PR!
|
| 335 |
+
|
| 336 |
+
---
|
| 337 |
+
|
| 338 |
+
## 📖 Citation
|
| 339 |
+
|
| 340 |
+
If you use AuditRepairEnv++ in your research, please cite:
|
| 341 |
+
|
| 342 |
+
```bibtex
|
| 343 |
+
@software{auditrepairenv2024,
|
| 344 |
+
title={AuditRepairEnv++: RL Environment for Cost-Constrained Iterative Ledger Repair},
|
| 345 |
+
author={Your Name},
|
| 346 |
+
year={2024},
|
| 347 |
+
url={https://github.com/your-repo/auditrepairenv-plus}
|
| 348 |
+
}
|
| 349 |
+
```
|
| 350 |
+
|
| 351 |
+
---
|
| 352 |
+
|
| 353 |
+
**Built with ❤️ for the AI community. Let's teach agents to be careful accountants.**
|
app.py
ADDED
|
@@ -0,0 +1,416 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Streamlit App for AuditRepairEnv++ Demo
|
| 3 |
+
Interactive demonstration of the RL environment for ledger repair
|
| 4 |
+
"""
|
| 5 |
+
|
| 6 |
+
import streamlit as st
|
| 7 |
+
import numpy as np
|
| 8 |
+
from chronostasis import LedgerRepairEnv
|
| 9 |
+
import plotly.graph_objects as go
|
| 10 |
+
|
| 11 |
+
# Page config
|
| 12 |
+
st.set_page_config(
|
| 13 |
+
page_title="AuditRepairEnv++",
|
| 14 |
+
page_icon="🤖",
|
| 15 |
+
layout="wide",
|
| 16 |
+
initial_sidebar_state="expanded"
|
| 17 |
+
)
|
| 18 |
+
|
| 19 |
+
# Styling
|
| 20 |
+
st.markdown("""
|
| 21 |
+
<style>
|
| 22 |
+
.stTabs [data-baseurlpath] {color: #667eea;}
|
| 23 |
+
h1 {color: #667eea;}
|
| 24 |
+
h2 {color: #764ba2;}
|
| 25 |
+
</style>
|
| 26 |
+
""", unsafe_allow_html=True)
|
| 27 |
+
|
| 28 |
+
# Initialize session state
|
| 29 |
+
if 'env' not in st.session_state:
|
| 30 |
+
st.session_state.env = None
|
| 31 |
+
if 'episode_history' not in st.session_state:
|
| 32 |
+
st.session_state.episode_history = []
|
| 33 |
+
if 'current_obs' not in st.session_state:
|
| 34 |
+
st.session_state.current_obs = None
|
| 35 |
+
if 'current_info' not in st.session_state:
|
| 36 |
+
st.session_state.current_info = None
|
| 37 |
+
|
| 38 |
+
# Header
|
| 39 |
+
col1, col2 = st.columns([4, 1])
|
| 40 |
+
with col1:
|
| 41 |
+
st.title("🤖 AuditRepairEnv++")
|
| 42 |
+
st.markdown("**RL Environment for Cost-Constrained Iterative Ledger Repair**")
|
| 43 |
+
|
| 44 |
+
with col2:
|
| 45 |
+
st.metric("Version", "1.0.0")
|
| 46 |
+
|
| 47 |
+
st.markdown("""
|
| 48 |
+
Fix financial ledger errors while managing costs and avoiding cascading problems.
|
| 49 |
+
An interactive Reinforcement Learning environment for multi-step decision making.
|
| 50 |
+
""")
|
| 51 |
+
|
| 52 |
+
st.divider()
|
| 53 |
+
|
| 54 |
+
# Sidebar - Configuration
|
| 55 |
+
with st.sidebar:
|
| 56 |
+
st.header("⚙️ Configuration")
|
| 57 |
+
|
| 58 |
+
scenario = st.selectbox(
|
| 59 |
+
"Choose Scenario",
|
| 60 |
+
["Easy", "Medium", "Hard"],
|
| 61 |
+
help="Difficulty level affects complexity"
|
| 62 |
+
)
|
| 63 |
+
|
| 64 |
+
# Scenario presets
|
| 65 |
+
scenarios = {
|
| 66 |
+
"Easy": {
|
| 67 |
+
"num_transactions": 15,
|
| 68 |
+
"error_probability": 0.25,
|
| 69 |
+
"budget": 250.0,
|
| 70 |
+
"max_steps": 40
|
| 71 |
+
},
|
| 72 |
+
"Medium": {
|
| 73 |
+
"num_transactions": 25,
|
| 74 |
+
"error_probability": 0.35,
|
| 75 |
+
"budget": 200.0,
|
| 76 |
+
"max_steps": 50
|
| 77 |
+
},
|
| 78 |
+
"Hard": {
|
| 79 |
+
"num_transactions": 40,
|
| 80 |
+
"error_probability": 0.45,
|
| 81 |
+
"budget": 150.0,
|
| 82 |
+
"max_steps": 60
|
| 83 |
+
}
|
| 84 |
+
}
|
| 85 |
+
|
| 86 |
+
config = scenarios[scenario]
|
| 87 |
+
|
| 88 |
+
# Advanced options
|
| 89 |
+
with st.expander("🔧 Advanced Settings"):
|
| 90 |
+
config["num_transactions"] = st.slider(
|
| 91 |
+
"Transactions", 5, 100, config["num_transactions"]
|
| 92 |
+
)
|
| 93 |
+
config["error_probability"] = st.slider(
|
| 94 |
+
"Error Probability", 0.0, 1.0, config["error_probability"], 0.05
|
| 95 |
+
)
|
| 96 |
+
config["budget"] = st.slider(
|
| 97 |
+
"Budget ($)", 50.0, 500.0, config["budget"], 10.0
|
| 98 |
+
)
|
| 99 |
+
config["max_steps"] = st.slider(
|
| 100 |
+
"Max Steps", 10, 200, config["max_steps"], 10
|
| 101 |
+
)
|
| 102 |
+
|
| 103 |
+
st.divider()
|
| 104 |
+
st.subheader("📖 Help")
|
| 105 |
+
st.markdown("""
|
| 106 |
+
### Actions
|
| 107 |
+
- **Fix (0)**: Repair an error • Cost: $10
|
| 108 |
+
- **Revert (1)**: Undo last action • Cost: $5
|
| 109 |
+
- **Skip (2)**: Do nothing • Cost: $0
|
| 110 |
+
|
| 111 |
+
### Goal
|
| 112 |
+
Achieve 100% consistency while staying under budget.
|
| 113 |
+
""")
|
| 114 |
+
|
| 115 |
+
# Main content - Tabs
|
| 116 |
+
tab1, tab2, tab3, tab4 = st.tabs(
|
| 117 |
+
["🎮 Play", "📊 Metrics", "📋 Details", "ℹ️ About"]
|
| 118 |
+
)
|
| 119 |
+
|
| 120 |
+
with tab1:
|
| 121 |
+
st.header("Play the Game")
|
| 122 |
+
|
| 123 |
+
col1, col2, col3 = st.columns(3)
|
| 124 |
+
|
| 125 |
+
with col1:
|
| 126 |
+
if st.button("🔄 Reset Environment", key="reset_btn", use_container_width=True):
|
| 127 |
+
st.session_state.env = LedgerRepairEnv(**config)
|
| 128 |
+
obs, info = st.session_state.env.reset()
|
| 129 |
+
st.session_state.current_obs = obs
|
| 130 |
+
st.session_state.current_info = info
|
| 131 |
+
st.session_state.episode_history = [{
|
| 132 |
+
"step": 0,
|
| 133 |
+
"action": "RESET",
|
| 134 |
+
"reward": 0.0,
|
| 135 |
+
"cost": 0.0,
|
| 136 |
+
"errors": info['num_errors'],
|
| 137 |
+
"consistency": 0.0
|
| 138 |
+
}]
|
| 139 |
+
st.success("✅ Environment reset!")
|
| 140 |
+
st.rerun()
|
| 141 |
+
|
| 142 |
+
with col2:
|
| 143 |
+
st.write("") # Spacer
|
| 144 |
+
|
| 145 |
+
with col3:
|
| 146 |
+
st.write("") # Spacer
|
| 147 |
+
|
| 148 |
+
if st.session_state.env is None:
|
| 149 |
+
st.info("👈 Click 'Reset Environment' to start")
|
| 150 |
+
else:
|
| 151 |
+
env = st.session_state.env
|
| 152 |
+
obs = st.session_state.current_obs
|
| 153 |
+
info = st.session_state.current_info
|
| 154 |
+
|
| 155 |
+
# Current state display
|
| 156 |
+
col1, col2, col3, col4 = st.columns(4)
|
| 157 |
+
|
| 158 |
+
with col1:
|
| 159 |
+
st.metric(
|
| 160 |
+
"Errors Remaining",
|
| 161 |
+
info['num_errors'],
|
| 162 |
+
f"-{env.initial_error_count - info['num_errors']} {env.initial_error_count - info['num_errors'] != 1 and 's' or ''}",
|
| 163 |
+
delta_color="inverse"
|
| 164 |
+
)
|
| 165 |
+
|
| 166 |
+
with col2:
|
| 167 |
+
st.metric(
|
| 168 |
+
"Budget Remaining",
|
| 169 |
+
f"${info['budget_remaining']:.2f}",
|
| 170 |
+
f"spent: ${info['total_cost']:.2f}",
|
| 171 |
+
)
|
| 172 |
+
|
| 173 |
+
with col3:
|
| 174 |
+
consistency = (env.initial_error_count - info['num_errors']) / max(env.initial_error_count, 1) * 100
|
| 175 |
+
st.metric("Consistency", f"{consistency:.1f}%")
|
| 176 |
+
|
| 177 |
+
with col4:
|
| 178 |
+
st.metric("Step", info['step'])
|
| 179 |
+
|
| 180 |
+
st.divider()
|
| 181 |
+
|
| 182 |
+
# Action buttons
|
| 183 |
+
st.subheader("Choose Action:")
|
| 184 |
+
|
| 185 |
+
col1, col2, col3 = st.columns(3)
|
| 186 |
+
|
| 187 |
+
with col1:
|
| 188 |
+
if st.button("🔧 Fix Entry (Cost: $10)", use_container_width=True, key="fix"):
|
| 189 |
+
obs, reward, terminated, truncated, info = env.step(0)
|
| 190 |
+
st.session_state.current_obs = obs
|
| 191 |
+
st.session_state.current_info = info
|
| 192 |
+
|
| 193 |
+
st.session_state.episode_history.append({
|
| 194 |
+
"step": info['step'],
|
| 195 |
+
"action": "FIX",
|
| 196 |
+
"reward": reward,
|
| 197 |
+
"cost": info['total_cost'],
|
| 198 |
+
"errors": info['num_errors'],
|
| 199 |
+
"consistency": env.ledger.consistency_ratio() * 100
|
| 200 |
+
})
|
| 201 |
+
|
| 202 |
+
if terminated:
|
| 203 |
+
st.balloons()
|
| 204 |
+
st.success(f"🎉 Episode Complete! Final Score: {sum([h['reward'] for h in st.session_state.episode_history]):.2f}")
|
| 205 |
+
|
| 206 |
+
if truncated:
|
| 207 |
+
st.warning("⏱️ Max steps reached!")
|
| 208 |
+
|
| 209 |
+
st.rerun()
|
| 210 |
+
|
| 211 |
+
with col2:
|
| 212 |
+
if st.button("↩️ Revert (Cost: $5)", use_container_width=True, key="revert"):
|
| 213 |
+
obs, reward, terminated, truncated, info = env.step(1)
|
| 214 |
+
st.session_state.current_obs = obs
|
| 215 |
+
st.session_state.current_info = info
|
| 216 |
+
|
| 217 |
+
st.session_state.episode_history.append({
|
| 218 |
+
"step": info['step'],
|
| 219 |
+
"action": "REVERT",
|
| 220 |
+
"reward": reward,
|
| 221 |
+
"cost": info['total_cost'],
|
| 222 |
+
"errors": info['num_errors'],
|
| 223 |
+
"consistency": env.ledger.consistency_ratio() * 100
|
| 224 |
+
})
|
| 225 |
+
|
| 226 |
+
st.rerun()
|
| 227 |
+
|
| 228 |
+
with col3:
|
| 229 |
+
if st.button("⏯️ Skip (Cost: $0)", use_container_width=True, key="skip"):
|
| 230 |
+
obs, reward, terminated, truncated, info = env.step(2)
|
| 231 |
+
st.session_state.current_obs = obs
|
| 232 |
+
st.session_state.current_info = info
|
| 233 |
+
|
| 234 |
+
st.session_state.episode_history.append({
|
| 235 |
+
"step": info['step'],
|
| 236 |
+
"action": "SKIP",
|
| 237 |
+
"reward": reward,
|
| 238 |
+
"cost": info['total_cost'],
|
| 239 |
+
"errors": info['num_errors'],
|
| 240 |
+
"consistency": env.ledger.consistency_ratio() * 100
|
| 241 |
+
})
|
| 242 |
+
|
| 243 |
+
st.rerun()
|
| 244 |
+
|
| 245 |
+
st.divider()
|
| 246 |
+
|
| 247 |
+
# Remaining errors display
|
| 248 |
+
if info['num_errors'] > 0:
|
| 249 |
+
st.subheader("⚠️ Remaining Errors:")
|
| 250 |
+
error_list = list(env.ledger.errors.items())[:5]
|
| 251 |
+
for entry_id, error_desc in error_list:
|
| 252 |
+
st.warning(f"**Entry {entry_id}:** {error_desc}")
|
| 253 |
+
if len(env.ledger.errors) > 5:
|
| 254 |
+
st.info(f"... and {len(env.ledger.errors) - 5} more errors")
|
| 255 |
+
else:
|
| 256 |
+
st.success("✅ All errors fixed!")
|
| 257 |
+
|
| 258 |
+
with tab2:
|
| 259 |
+
st.header("📊 Episode Metrics")
|
| 260 |
+
|
| 261 |
+
if not st.session_state.episode_history or len(st.session_state.episode_history) <= 1:
|
| 262 |
+
st.info("👈 Play the game to see metrics")
|
| 263 |
+
else:
|
| 264 |
+
history = st.session_state.episode_history[1:] # Skip reset
|
| 265 |
+
|
| 266 |
+
# Charts
|
| 267 |
+
col1, col2 = st.columns(2)
|
| 268 |
+
|
| 269 |
+
with col1:
|
| 270 |
+
# Cumulative reward
|
| 271 |
+
steps = [h['step'] for h in history]
|
| 272 |
+
cumulative_rewards = np.cumsum([h['reward'] for h in history])
|
| 273 |
+
|
| 274 |
+
fig = go.Figure()
|
| 275 |
+
fig.add_trace(go.Scatter(
|
| 276 |
+
x=steps, y=cumulative_rewards,
|
| 277 |
+
mode='lines+markers',
|
| 278 |
+
name='Cumulative Reward',
|
| 279 |
+
line=dict(color='#667eea', width=2),
|
| 280 |
+
fill='tozeroy'
|
| 281 |
+
))
|
| 282 |
+
fig.update_layout(
|
| 283 |
+
title="Cumulative Reward",
|
| 284 |
+
xaxis_title="Step",
|
| 285 |
+
yaxis_title="Reward",
|
| 286 |
+
height=400,
|
| 287 |
+
template="plotly_white"
|
| 288 |
+
)
|
| 289 |
+
st.plotly_chart(fig, use_container_width=True)
|
| 290 |
+
|
| 291 |
+
with col2:
|
| 292 |
+
# Cost and consistency
|
| 293 |
+
costs = [h['cost'] for h in history]
|
| 294 |
+
consistency = [h['consistency'] for h in history]
|
| 295 |
+
|
| 296 |
+
fig = go.Figure()
|
| 297 |
+
fig.add_trace(go.Scatter(
|
| 298 |
+
x=steps, y=costs,
|
| 299 |
+
mode='lines+markers',
|
| 300 |
+
name='Total Cost',
|
| 301 |
+
line=dict(color='#ef4444', width=2),
|
| 302 |
+
yaxis='y'
|
| 303 |
+
))
|
| 304 |
+
fig.add_trace(go.Scatter(
|
| 305 |
+
x=steps, y=consistency,
|
| 306 |
+
mode='lines+markers',
|
| 307 |
+
name='Consistency %',
|
| 308 |
+
line=dict(color='#10b981', width=2),
|
| 309 |
+
yaxis='y2'
|
| 310 |
+
))
|
| 311 |
+
fig.update_layout(
|
| 312 |
+
title="Cost vs Consistency",
|
| 313 |
+
xaxis_title="Step",
|
| 314 |
+
yaxis=dict(title="Cost ($)", side='left'),
|
| 315 |
+
yaxis2=dict(title="Consistency (%)", side='right', overlaying='y'),
|
| 316 |
+
height=400,
|
| 317 |
+
template="plotly_white",
|
| 318 |
+
hovermode='x unified'
|
| 319 |
+
)
|
| 320 |
+
st.plotly_chart(fig, use_container_width=True)
|
| 321 |
+
|
| 322 |
+
# Statistics
|
| 323 |
+
st.divider()
|
| 324 |
+
st.subheader("📈 Statistics")
|
| 325 |
+
|
| 326 |
+
col1, col2, col3, col4 = st.columns(4)
|
| 327 |
+
|
| 328 |
+
with col1:
|
| 329 |
+
st.metric("Total Steps", len(history))
|
| 330 |
+
|
| 331 |
+
with col2:
|
| 332 |
+
total_reward = sum([h['reward'] for h in history])
|
| 333 |
+
st.metric("Total Reward", f"{total_reward:.2f}")
|
| 334 |
+
|
| 335 |
+
with col3:
|
| 336 |
+
final_cost = history[-1]['cost']
|
| 337 |
+
st.metric("Final Cost", f"${final_cost:.2f}")
|
| 338 |
+
|
| 339 |
+
with col4:
|
| 340 |
+
final_consistency = history[-1]['consistency']
|
| 341 |
+
st.metric("Final Consistency", f"{final_consistency:.1f}%")
|
| 342 |
+
|
| 343 |
+
with tab3:
|
| 344 |
+
st.header("📋 Episode History")
|
| 345 |
+
|
| 346 |
+
if not st.session_state.episode_history or len(st.session_state.episode_history) <= 1:
|
| 347 |
+
st.info("👈 Play the game to see history")
|
| 348 |
+
else:
|
| 349 |
+
import pandas as pd
|
| 350 |
+
|
| 351 |
+
history_df = pd.DataFrame(st.session_state.episode_history[1:])
|
| 352 |
+
st.dataframe(
|
| 353 |
+
history_df,
|
| 354 |
+
use_container_width=True,
|
| 355 |
+
hide_index=True,
|
| 356 |
+
column_config={
|
| 357 |
+
"step": st.column_config.NumberColumn("Step", format="%d"),
|
| 358 |
+
"action": st.column_config.TextColumn("Action"),
|
| 359 |
+
"reward": st.column_config.NumberColumn("Reward", format="%.2f"),
|
| 360 |
+
"cost": st.column_config.NumberColumn("Cost", format="$%.2f"),
|
| 361 |
+
"errors": st.column_config.NumberColumn("Errors", format="%d"),
|
| 362 |
+
"consistency": st.column_config.NumberColumn("Consistency", format="%.1f%%"),
|
| 363 |
+
}
|
| 364 |
+
)
|
| 365 |
+
|
| 366 |
+
with tab4:
|
| 367 |
+
st.header("ℹ️ About AuditRepairEnv++")
|
| 368 |
+
|
| 369 |
+
st.markdown("""
|
| 370 |
+
### 🎯 What is This?
|
| 371 |
+
|
| 372 |
+
AuditRepairEnv++ is an OpenAI Gymnasium-compatible RL environment where agents must
|
| 373 |
+
iteratively repair inconsistencies in a financial ledger while:
|
| 374 |
+
|
| 375 |
+
- **Managing Costs**: Each action has a monetary cost
|
| 376 |
+
- **Avoiding Cascade Errors**: Fixing one error can introduce new errors
|
| 377 |
+
- **Meeting Constraints**: Stay within a budget while maximizing consistency
|
| 378 |
+
|
| 379 |
+
### 🤖 Real-World Applications
|
| 380 |
+
|
| 381 |
+
- Financial reconciliation systems
|
| 382 |
+
- Audit ledger repair
|
| 383 |
+
- Transaction correction in payment systems
|
| 384 |
+
- Data cleaning and consistency checking
|
| 385 |
+
|
| 386 |
+
### 📊 Environment Metrics
|
| 387 |
+
|
| 388 |
+
Your performance is evaluated on:
|
| 389 |
+
|
| 390 |
+
1. **Consistency (40%)**: How many errors you fix
|
| 391 |
+
2. **Cost Efficiency (35%)**: How well you stay under budget
|
| 392 |
+
3. **Action Efficiency (15%)**: How few actions you take
|
| 393 |
+
4. **Stability (10%)**: How few overcorrections you make
|
| 394 |
+
|
| 395 |
+
### 🚀 Try Different Scenarios
|
| 396 |
+
|
| 397 |
+
- **Easy**: Simple ledgers with fewer errors
|
| 398 |
+
- **Medium**: Complex patterns with cascading effects
|
| 399 |
+
- **Hard**: Large-scale problems with hidden dependencies
|
| 400 |
+
|
| 401 |
+
### 📚 Learn More
|
| 402 |
+
|
| 403 |
+
- [GitHub Repository](https://github.com/your-repo/auditrepairenv-plus)
|
| 404 |
+
- [OpenAPI Docs](/docs)
|
| 405 |
+
- [Gymnasium Framework](https://gymnasium.farama.org/)
|
| 406 |
+
|
| 407 |
+
### 💡 Tips for Success
|
| 408 |
+
|
| 409 |
+
1. Start with **Easy** difficulty
|
| 410 |
+
2. Watch for **cascading errors** (fixing one can break another)
|
| 411 |
+
3. Balance **speed** with **cost**
|
| 412 |
+
4. Use **Revert** strategically when mistakes happen
|
| 413 |
+
""")
|
| 414 |
+
|
| 415 |
+
st.divider()
|
| 416 |
+
st.markdown("**Built with ❤️ for the AI community** | v1.0.0")
|
chronostasis/__init__.py
ADDED
|
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
AuditRepairEnv++ — RL Environment for Cost-Constrained Iterative Ledger Repair
|
| 3 |
+
|
| 4 |
+
A gymnasium-compatible RL environment for training agents to iteratively repair
|
| 5 |
+
financial ledgers while managing costs and avoiding cascading errors.
|
| 6 |
+
|
| 7 |
+
Example:
|
| 8 |
+
>>> from chronostasis import LedgerRepairEnv
|
| 9 |
+
>>> env = LedgerRepairEnv(num_transactions=20, budget=200.0)
|
| 10 |
+
>>> obs, info = env.reset()
|
| 11 |
+
>>> obs, reward, terminated, truncated, info = env.step(0)
|
| 12 |
+
"""
|
| 13 |
+
|
| 14 |
+
from chronostasis.ledger_repair_env import (
|
| 15 |
+
LedgerRepairEnv,
|
| 16 |
+
Ledger,
|
| 17 |
+
LedgerState,
|
| 18 |
+
Transaction,
|
| 19 |
+
)
|
| 20 |
+
|
| 21 |
+
__version__ = "1.0.0"
|
| 22 |
+
__all__ = [
|
| 23 |
+
"LedgerRepairEnv",
|
| 24 |
+
"Ledger",
|
| 25 |
+
"LedgerState",
|
| 26 |
+
"Transaction",
|
| 27 |
+
]
|
chronostasis/ledger_repair_env.py
ADDED
|
@@ -0,0 +1,399 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
LedgerRepairEnv — RL Environment for Cost-Constrained Iterative Ledger Repair
|
| 3 |
+
|
| 4 |
+
This module provides a gymnasium-compatible RL environment where an agent
|
| 5 |
+
must iteratively repair inconsistencies in a financial ledger while:
|
| 6 |
+
- Managing a limited budget (each action costs money)
|
| 7 |
+
- Avoiding cascading errors (fixing one entry can introduce new errors)
|
| 8 |
+
- Minimizing the number of actions taken
|
| 9 |
+
"""
|
| 10 |
+
|
| 11 |
+
import numpy as np
|
| 12 |
+
import gymnasium as gym
|
| 13 |
+
from gymnasium import spaces
|
| 14 |
+
from typing import Dict, List, Tuple, Any, Optional
|
| 15 |
+
from dataclasses import dataclass, field
|
| 16 |
+
import json
|
| 17 |
+
|
| 18 |
+
|
| 19 |
+
@dataclass
|
| 20 |
+
class Transaction:
|
| 21 |
+
"""Represents a ledger transaction."""
|
| 22 |
+
|
| 23 |
+
entry_id: int
|
| 24 |
+
source_account: str
|
| 25 |
+
dest_account: str
|
| 26 |
+
amount: float
|
| 27 |
+
timestamp: int
|
| 28 |
+
is_corrupted: bool = False
|
| 29 |
+
error_type: Optional[str] = None # 'amount_mismatch', 'missing_inverse', etc.
|
| 30 |
+
dependencies: List[int] = field(default_factory=list) # Entries this depends on
|
| 31 |
+
|
| 32 |
+
|
| 33 |
+
@dataclass
|
| 34 |
+
class LedgerState:
|
| 35 |
+
"""Represents the current state of the ledger."""
|
| 36 |
+
|
| 37 |
+
transactions: Dict[int, Transaction]
|
| 38 |
+
balances: Dict[str, float]
|
| 39 |
+
errors: Dict[int, str] # entry_id -> error description
|
| 40 |
+
total_cost: float = 0.0
|
| 41 |
+
actions_taken: int = 0
|
| 42 |
+
history: List[Dict[str, Any]] = field(default_factory=list)
|
| 43 |
+
|
| 44 |
+
def to_array(self) -> np.ndarray:
|
| 45 |
+
"""Convert state to numpy array for RL agent."""
|
| 46 |
+
# Flatten relevant state info: [num_errors, total_cost, actions, num_transactions]
|
| 47 |
+
num_errors = len(self.errors)
|
| 48 |
+
num_transactions = len(self.transactions)
|
| 49 |
+
|
| 50 |
+
# Create feature vector
|
| 51 |
+
features = np.array([
|
| 52 |
+
num_errors / max(num_transactions, 1), # Error ratio
|
| 53 |
+
self.total_cost, # Cost incurred
|
| 54 |
+
self.actions_taken, # Actions taken
|
| 55 |
+
num_transactions, # Total transactions
|
| 56 |
+
], dtype=np.float32)
|
| 57 |
+
|
| 58 |
+
return features
|
| 59 |
+
|
| 60 |
+
|
| 61 |
+
class Ledger:
|
| 62 |
+
"""Manages ledger state, transactions, and error detection."""
|
| 63 |
+
|
| 64 |
+
FIX_COST = 10.0 # Cost per fix action
|
| 65 |
+
REVERT_COST = 5.0 # Cost per revert
|
| 66 |
+
SKIP_COST = 0.0 # No cost for skip
|
| 67 |
+
|
| 68 |
+
def __init__(self, num_transactions: int = 20, error_probability: float = 0.3):
|
| 69 |
+
"""
|
| 70 |
+
Initialize a ledger with random transactions and errors.
|
| 71 |
+
|
| 72 |
+
Args:
|
| 73 |
+
num_transactions: Number of transactions to generate
|
| 74 |
+
error_probability: Probability of introducing an error
|
| 75 |
+
"""
|
| 76 |
+
self.num_transactions = num_transactions
|
| 77 |
+
self.error_probability = error_probability
|
| 78 |
+
self.transactions: Dict[int, Transaction] = {}
|
| 79 |
+
self.balances: Dict[str, float] = {}
|
| 80 |
+
self.errors: Dict[int, str] = {}
|
| 81 |
+
self.original_errors: Dict[int, str] = {} # For tracking baseline errors
|
| 82 |
+
self.fix_history: List[Dict[str, Any]] = []
|
| 83 |
+
|
| 84 |
+
self._initialize_ledger()
|
| 85 |
+
|
| 86 |
+
def _initialize_ledger(self) -> None:
|
| 87 |
+
"""Generate initial ledger with transactions and induced errors."""
|
| 88 |
+
accounts = ["account_A", "account_B", "account_C", "account_D"]
|
| 89 |
+
|
| 90 |
+
# Initialize balances
|
| 91 |
+
for acc in accounts:
|
| 92 |
+
self.balances[acc] = 1000.0
|
| 93 |
+
|
| 94 |
+
# Create transactions
|
| 95 |
+
for i in range(self.num_transactions):
|
| 96 |
+
src = np.random.choice(accounts)
|
| 97 |
+
dst = np.random.choice([a for a in accounts if a != src])
|
| 98 |
+
amount = np.random.uniform(10, 100)
|
| 99 |
+
|
| 100 |
+
txn = Transaction(
|
| 101 |
+
entry_id=i,
|
| 102 |
+
source_account=src,
|
| 103 |
+
dest_account=dst,
|
| 104 |
+
amount=amount,
|
| 105 |
+
timestamp=i,
|
| 106 |
+
is_corrupted=False,
|
| 107 |
+
dependencies=[]
|
| 108 |
+
)
|
| 109 |
+
|
| 110 |
+
self.transactions[i] = txn
|
| 111 |
+
self.balances[src] -= amount
|
| 112 |
+
self.balances[dst] += amount
|
| 113 |
+
|
| 114 |
+
# Store original state
|
| 115 |
+
self.original_balances = {acc: bal for acc, bal in self.balances.items()}
|
| 116 |
+
|
| 117 |
+
# Introduce errors
|
| 118 |
+
self._introduce_errors()
|
| 119 |
+
|
| 120 |
+
def _introduce_errors(self) -> None:
|
| 121 |
+
"""Introduce cascading errors in the ledger."""
|
| 122 |
+
error_indices = np.random.choice(
|
| 123 |
+
self.num_transactions,
|
| 124 |
+
size=max(1, int(self.num_transactions * self.error_probability)),
|
| 125 |
+
replace=False
|
| 126 |
+
)
|
| 127 |
+
|
| 128 |
+
for idx in error_indices:
|
| 129 |
+
error_type = np.random.choice(["amount_mismatch", "missing_inverse"])
|
| 130 |
+
txn = self.transactions[idx]
|
| 131 |
+
|
| 132 |
+
if error_type == "amount_mismatch":
|
| 133 |
+
# Corrupt the amount
|
| 134 |
+
corrupted_amount = txn.amount * np.random.uniform(0.5, 1.5)
|
| 135 |
+
diff = corrupted_amount - txn.amount
|
| 136 |
+
|
| 137 |
+
# Introduce balance inconsistency
|
| 138 |
+
self.balances[txn.source_account] += diff
|
| 139 |
+
self.errors[idx] = f"amount_mismatch: {txn.amount} vs {corrupted_amount}"
|
| 140 |
+
txn.is_corrupted = True
|
| 141 |
+
txn.error_type = "amount_mismatch"
|
| 142 |
+
|
| 143 |
+
# Cascade: mark dependent entries
|
| 144 |
+
if np.random.random() < 0.4:
|
| 145 |
+
dependent_idx = (idx + 1) % self.num_transactions
|
| 146 |
+
if dependent_idx != idx:
|
| 147 |
+
self.errors[dependent_idx] = "cascaded_error: depends on entry_" + str(idx)
|
| 148 |
+
self.transactions[dependent_idx].dependencies.append(idx)
|
| 149 |
+
|
| 150 |
+
else: # missing_inverse
|
| 151 |
+
self.errors[idx] = "missing_inverse: no matching reverse transaction"
|
| 152 |
+
txn.is_corrupted = True
|
| 153 |
+
txn.error_type = "missing_inverse"
|
| 154 |
+
|
| 155 |
+
self.original_errors = {k: v for k, v in self.errors.items()}
|
| 156 |
+
|
| 157 |
+
def fix_entry(self, entry_id: int) -> Tuple[bool, str]:
|
| 158 |
+
"""
|
| 159 |
+
Attempt to fix an entry.
|
| 160 |
+
|
| 161 |
+
Returns:
|
| 162 |
+
(success, message)
|
| 163 |
+
"""
|
| 164 |
+
if entry_id not in self.transactions:
|
| 165 |
+
return False, f"Entry {entry_id} not found"
|
| 166 |
+
|
| 167 |
+
if entry_id not in self.errors:
|
| 168 |
+
return False, f"Entry {entry_id} has no errors"
|
| 169 |
+
|
| 170 |
+
txn = self.transactions[entry_id]
|
| 171 |
+
self.errors.pop(entry_id)
|
| 172 |
+
|
| 173 |
+
# Simulate fixing: correct the balance
|
| 174 |
+
if txn.error_type == "amount_mismatch":
|
| 175 |
+
# Reset to correct amount
|
| 176 |
+
diff = txn.amount - (txn.amount * np.random.uniform(0.5, 1.5))
|
| 177 |
+
self.balances[txn.source_account] += diff
|
| 178 |
+
self.fix_history.append({
|
| 179 |
+
"action": "fix",
|
| 180 |
+
"entry": entry_id,
|
| 181 |
+
"type": "amount_mismatch",
|
| 182 |
+
"cost": self.FIX_COST
|
| 183 |
+
})
|
| 184 |
+
|
| 185 |
+
return True, f"Fixed entry {entry_id}"
|
| 186 |
+
|
| 187 |
+
def revert_action(self, last_action_idx: int) -> Tuple[bool, str]:
|
| 188 |
+
"""Revert the last action taken."""
|
| 189 |
+
if not self.fix_history:
|
| 190 |
+
return False, "No actions to revert"
|
| 191 |
+
|
| 192 |
+
self.fix_history.pop()
|
| 193 |
+
return True, "Action reverted"
|
| 194 |
+
|
| 195 |
+
def get_state(self) -> LedgerState:
|
| 196 |
+
"""Get current ledger state."""
|
| 197 |
+
return LedgerState(
|
| 198 |
+
transactions=self.transactions,
|
| 199 |
+
balances={k: v for k, v in self.balances.items()},
|
| 200 |
+
errors={k: v for k, v in self.errors.items()},
|
| 201 |
+
history=self.fix_history.copy()
|
| 202 |
+
)
|
| 203 |
+
|
| 204 |
+
def is_valid(self) -> bool:
|
| 205 |
+
"""Check if ledger is valid (no errors)."""
|
| 206 |
+
return len(self.errors) == 0
|
| 207 |
+
|
| 208 |
+
def consistency_ratio(self) -> float:
|
| 209 |
+
"""Return ratio of consistent entries (0.0 to 1.0)."""
|
| 210 |
+
if self.num_transactions == 0:
|
| 211 |
+
return 1.0
|
| 212 |
+
return (self.num_transactions - len(self.errors)) / self.num_transactions
|
| 213 |
+
|
| 214 |
+
|
| 215 |
+
class LedgerRepairEnv(gym.Env):
|
| 216 |
+
"""
|
| 217 |
+
RL Environment for iteratively repairing a corrupted ledger.
|
| 218 |
+
|
| 219 |
+
Action Space:
|
| 220 |
+
0: Fix an entry (costs FIX_COST)
|
| 221 |
+
1: Revert last action (costs REVERT_COST)
|
| 222 |
+
2-N+2: Skip to specific entry
|
| 223 |
+
|
| 224 |
+
Observation Space:
|
| 225 |
+
4-dim vector: [error_ratio, total_cost, actions_taken, num_transactions]
|
| 226 |
+
|
| 227 |
+
Reward:
|
| 228 |
+
- Positive: fixing errors
|
| 229 |
+
- Negative: exceeding budget or creating cascading errors
|
| 230 |
+
- Terminal: bonus for achieving full consistency under budget
|
| 231 |
+
"""
|
| 232 |
+
|
| 233 |
+
def __init__(
|
| 234 |
+
self,
|
| 235 |
+
num_transactions: int = 20,
|
| 236 |
+
error_probability: float = 0.3,
|
| 237 |
+
budget: float = 200.0,
|
| 238 |
+
max_steps: int = 50,
|
| 239 |
+
):
|
| 240 |
+
"""
|
| 241 |
+
Initialize the environment.
|
| 242 |
+
|
| 243 |
+
Args:
|
| 244 |
+
num_transactions: Number of transactions in the ledger
|
| 245 |
+
error_probability: Probability of each transaction having an error
|
| 246 |
+
budget: Maximum cost budget for repairs
|
| 247 |
+
max_steps: Maximum number of steps per episode
|
| 248 |
+
"""
|
| 249 |
+
super().__init__()
|
| 250 |
+
|
| 251 |
+
self.num_transactions = num_transactions
|
| 252 |
+
self.error_probability = error_probability
|
| 253 |
+
self.budget = budget
|
| 254 |
+
self.max_steps = max_steps
|
| 255 |
+
|
| 256 |
+
# Initialize ledger
|
| 257 |
+
self.ledger = Ledger(num_transactions, error_probability)
|
| 258 |
+
self.initial_error_count = len(self.ledger.errors)
|
| 259 |
+
|
| 260 |
+
# Action space: [fix, revert, skip_1, skip_2, ..., skip_N]
|
| 261 |
+
# For simplicity, we'll use discrete actions: 0=fix next error, 1=revert, 2=skip
|
| 262 |
+
self.action_space = spaces.Discrete(3)
|
| 263 |
+
|
| 264 |
+
# Observation space: [error_ratio, cost, actions, num_transactions]
|
| 265 |
+
self.observation_space = spaces.Box(
|
| 266 |
+
low=0.0,
|
| 267 |
+
high=1e6,
|
| 268 |
+
shape=(4,),
|
| 269 |
+
dtype=np.float32
|
| 270 |
+
)
|
| 271 |
+
|
| 272 |
+
self.current_step = 0
|
| 273 |
+
self.total_cost = 0.0
|
| 274 |
+
self.actions_list: List[int] = []
|
| 275 |
+
self.current_error_idx = 0 # Track which error to fix next
|
| 276 |
+
|
| 277 |
+
def reset(self, seed: Optional[int] = None) -> Tuple[np.ndarray, Dict[str, Any]]:
|
| 278 |
+
"""Reset environment to initial state."""
|
| 279 |
+
super().reset(seed=seed)
|
| 280 |
+
|
| 281 |
+
self.ledger = Ledger(self.num_transactions, self.error_probability)
|
| 282 |
+
self.initial_error_count = len(self.ledger.errors)
|
| 283 |
+
self.current_step = 0
|
| 284 |
+
self.total_cost = 0.0
|
| 285 |
+
self.actions_list = []
|
| 286 |
+
self.current_error_idx = 0
|
| 287 |
+
|
| 288 |
+
obs = self._get_observation()
|
| 289 |
+
info = self._get_info()
|
| 290 |
+
|
| 291 |
+
return obs, info
|
| 292 |
+
|
| 293 |
+
def step(self, action: int) -> Tuple[np.ndarray, float, bool, bool, Dict[str, Any]]:
|
| 294 |
+
"""
|
| 295 |
+
Execute one step of the environment.
|
| 296 |
+
|
| 297 |
+
Args:
|
| 298 |
+
action: 0=fix, 1=revert, 2=skip
|
| 299 |
+
|
| 300 |
+
Returns:
|
| 301 |
+
(observation, reward, terminated, truncated, info)
|
| 302 |
+
"""
|
| 303 |
+
self.current_step += 1
|
| 304 |
+
reward = 0.0
|
| 305 |
+
terminated = False
|
| 306 |
+
truncated = self.current_step >= self.max_steps
|
| 307 |
+
|
| 308 |
+
info = {}
|
| 309 |
+
|
| 310 |
+
# Get current state
|
| 311 |
+
error_ids = list(self.ledger.errors.keys())
|
| 312 |
+
|
| 313 |
+
if action == 0: # Fix
|
| 314 |
+
if error_ids:
|
| 315 |
+
error_to_fix = error_ids[self.current_error_idx % len(error_ids)]
|
| 316 |
+
success, message = self.ledger.fix_entry(error_to_fix)
|
| 317 |
+
|
| 318 |
+
if success:
|
| 319 |
+
self.total_cost += Ledger.FIX_COST
|
| 320 |
+
reward += 10.0 # Reward for fixing
|
| 321 |
+
self.actions_list.append(0)
|
| 322 |
+
self.current_error_idx += 1
|
| 323 |
+
|
| 324 |
+
# Penalty if cost exceeds budget
|
| 325 |
+
if self.total_cost > self.budget:
|
| 326 |
+
reward -= 20.0
|
| 327 |
+
|
| 328 |
+
info["action"] = "fix"
|
| 329 |
+
info["message"] = message
|
| 330 |
+
else:
|
| 331 |
+
reward -= 5.0 # Penalty for failed action
|
| 332 |
+
info["action"] = "fix_failed"
|
| 333 |
+
|
| 334 |
+
elif action == 1: # Revert
|
| 335 |
+
success, message = self.ledger.revert_action(len(self.actions_list) - 1)
|
| 336 |
+
if success:
|
| 337 |
+
self.total_cost += Ledger.REVERT_COST
|
| 338 |
+
reward -= 3.0 # Small penalty for reverting
|
| 339 |
+
self.actions_list.append(1)
|
| 340 |
+
info["action"] = "revert"
|
| 341 |
+
else:
|
| 342 |
+
reward -= 2.0
|
| 343 |
+
info["action"] = "revert_failed"
|
| 344 |
+
|
| 345 |
+
else: # Skip
|
| 346 |
+
reward -= 1.0 # Small penalty for doing nothing
|
| 347 |
+
self.actions_list.append(2)
|
| 348 |
+
info["action"] = "skip"
|
| 349 |
+
|
| 350 |
+
# Check termination conditions
|
| 351 |
+
if self.ledger.is_valid():
|
| 352 |
+
terminated = True
|
| 353 |
+
# Bonus for completing under budget
|
| 354 |
+
if self.total_cost <= self.budget:
|
| 355 |
+
reward += 50.0
|
| 356 |
+
# Penalty for using too many actions
|
| 357 |
+
reward -= len(self.actions_list) * 0.5
|
| 358 |
+
info["success"] = True
|
| 359 |
+
info["consistency_ratio"] = 1.0
|
| 360 |
+
else:
|
| 361 |
+
info["success"] = False
|
| 362 |
+
info["consistency_ratio"] = self.ledger.consistency_ratio()
|
| 363 |
+
|
| 364 |
+
obs = self._get_observation()
|
| 365 |
+
info.update(self._get_info())
|
| 366 |
+
|
| 367 |
+
return obs, reward, terminated, truncated, info
|
| 368 |
+
|
| 369 |
+
def _get_observation(self) -> np.ndarray:
|
| 370 |
+
"""Get current observation."""
|
| 371 |
+
state = self.ledger.get_state()
|
| 372 |
+
return state.to_array()
|
| 373 |
+
|
| 374 |
+
def _get_info(self) -> Dict[str, Any]:
|
| 375 |
+
"""Get info dict."""
|
| 376 |
+
return {
|
| 377 |
+
"total_cost": self.total_cost,
|
| 378 |
+
"budget_remaining": self.budget - self.total_cost,
|
| 379 |
+
"num_errors": len(self.ledger.errors),
|
| 380 |
+
"initial_errors": self.initial_error_count,
|
| 381 |
+
"actions_taken": len(self.actions_list),
|
| 382 |
+
"step": self.current_step,
|
| 383 |
+
}
|
| 384 |
+
|
| 385 |
+
def render(self) -> None:
|
| 386 |
+
"""Render current state."""
|
| 387 |
+
print("\n" + "=" * 60)
|
| 388 |
+
print(f"Step: {self.current_step}")
|
| 389 |
+
print(f"Budget: ${self.budget:.2f} | Spent: ${self.total_cost:.2f}")
|
| 390 |
+
print(f"Errors Remaining: {len(self.ledger.errors)}/{self.initial_error_count}")
|
| 391 |
+
print(f"Consistency: {self.ledger.consistency_ratio() * 100:.1f}%")
|
| 392 |
+
print("=" * 60)
|
| 393 |
+
|
| 394 |
+
if self.ledger.errors:
|
| 395 |
+
print("Remaining Errors:")
|
| 396 |
+
for eid, err in list(self.ledger.errors.items())[:5]:
|
| 397 |
+
print(f" Entry {eid}: {err}")
|
| 398 |
+
else:
|
| 399 |
+
print("✓ Ledger is fully consistent!")
|