ps2181's picture
Update openenv.yaml
54a7c04 verified
name: invoice_processing_pipeline
version: "1.0.0"
description: >
A self-improving 5-agent adversarial RL environment for invoice fraud detection.
A cross-episode Regulator monitors the Auditor's blind spots and biases the Generator
to produce harder fraud — closing a self-improvement loop without human intervention.
10 tasks from easy extraction to 20-step long-horizon investigations and adaptive
personalized curricula.
author: "Pritam Satpathy & Gnana Nawin T"
license: "MIT"
tags:
- openenv
- invoice
- fraud-detection
- multi-agent
- self-improvement
- grpo
- finance
- curriculum
environment:
module: server.app
class: InvoiceEnvironment
action: models.InvoiceAction
observation: models.InvoiceObservation
tasks:
- id: easy
name: "Single Invoice Extraction"
description: "Extract structured fields (vendor, date, currency, total, line items) from a single invoice."
difficulty: easy
- id: medium
name: "Batch Invoice Cleaning"
description: "Clean and normalise a batch of messy invoices: fix dates, vendor typos, currency codes, and amounts."
difficulty: medium
- id: hard
name: "Invoice-PO Reconciliation"
description: "Extract, clean, and reconcile invoices against purchase orders. Flag overcharges, extra items, and missing items."
difficulty: hard
- id: expert
name: "Invoice Fraud Audit"
description: "Detect fraudulent invoices using approved vendor registry, market price catalog, and invoice history. Classify fraud type: phantom_vendor, price_gouging, duplicate_submission, or math_fraud."
difficulty: expert
- id: adversarial
name: "Adversarial Invoice Extraction"
description: "Extract from an invoice with OCR corruption (0→O, 1→l, 5→S), a misleading SUBTOTAL trap, fabricated TAX/ADJUSTMENT lines, and a multi-currency FX noise line. The TOTAL line is always correct."
difficulty: hard
- id: negotiate
name: "Negotiated Invoice Clarification"
description: "Ask clarification questions (submit {question: str}) about an ambiguous invoice, then submit full structured extraction. Bonus awarded for solving correctly with ≤2 questions."
difficulty: medium
- id: supply_chain
name: "Supply Chain Anomaly Detection"
description: "Identify quantity shortfalls, price spikes, unauthorized substitutions, and phantom deliveries in a set of supply chain delivery records."
difficulty: expert
- id: long_horizon
name: "Long-Horizon Financial Investigation"
description: "20-step, 4-phase investigation with sparse rewards. Phase 1: extract 3 invoices. Phase 2: reconcile against POs (unlocked). Phase 3: fraud audit (registry unlocked). Phase 4: risk forecast. Each phase completion required to unlock next phase's reference data."
difficulty: expert
- id: personalized
name: "Personalized Adaptive Task"
description: "Tracks per-field accuracy (vendor, date, math, completeness) across steps and generates the next invoice to target the agent's weakest field. Reward weighted toward the historically weakest category."
difficulty: adaptive
- id: curriculum
name: "Auto-Progressive Curriculum"
description: "Automatically progresses the agent through easy→medium→hard→expert based on score. Score ≥0.80 to advance to next stage. Score <0.40 to be held back. Up to 20 steps across all stages."
difficulty: adaptive
endpoints:
reset: /reset
step: /step
state: /state
health: /health
grader: /grader
tasks: /tasks
metrics: /metrics
websocket: /ws
multi_agent_endpoints:
multi_reset: /multi/reset
multi_extract: /multi/extract
multi_audit: /multi/audit
multi_approve: /multi/approve
multi_state: /multi/state/{episode_id}
regulator_endpoints:
report: /regulator/report
forecast: /regulator/forecast
calibration: /regulator/calibration
predict: /regulator/predict
generator_score: /generator/score
ui:
gradio: /web
swagger: /docs