name: invoice_processing_pipeline
version: "1.0.0"
description: >
  A self-improving 5-agent adversarial RL environment for invoice fraud detection.
  A cross-episode Regulator monitors the Auditor's blind spots and biases the Generator
  to produce harder fraud — closing a self-improvement loop without human intervention.
  10 tasks from easy extraction to 20-step long-horizon investigations and adaptive
  personalized curricula.

author: "Pritam Satpathy & Gnana Nawin T"
license: "MIT"

tags:
  - openenv
  - invoice
  - fraud-detection
  - multi-agent
  - self-improvement
  - grpo
  - finance
  - curriculum

environment:
  module: server.app
  class: InvoiceEnvironment
  action: models.InvoiceAction
  observation: models.InvoiceObservation

tasks:
  - id: easy
    name: "Single Invoice Extraction"
    description: "Extract structured fields (vendor, date, currency, total, line items) from a single invoice."
    difficulty: easy

  - id: medium
    name: "Batch Invoice Cleaning"
    description: "Clean and normalise a batch of messy invoices: fix dates, vendor typos, currency codes, and amounts."
    difficulty: medium

  - id: hard
    name: "Invoice-PO Reconciliation"
    description: "Extract, clean, and reconcile invoices against purchase orders. Flag overcharges, extra items, and missing items."
    difficulty: hard

  - id: expert
    name: "Invoice Fraud Audit"
    description: "Detect fraudulent invoices using approved vendor registry, market price catalog, and invoice history. Classify fraud type: phantom_vendor, price_gouging, duplicate_submission, or math_fraud."
    difficulty: expert

  - id: adversarial
    name: "Adversarial Invoice Extraction"
    description: "Extract from an invoice with OCR corruption (0→O, 1→l, 5→S), a misleading SUBTOTAL trap, fabricated TAX/ADJUSTMENT lines, and a multi-currency FX noise line. The TOTAL line is always correct."
    difficulty: hard

  - id: negotiate
    name: "Negotiated Invoice Clarification"
    description: "Ask clarification questions (submit {question: str}) about an ambiguous invoice, then submit full structured extraction. Bonus awarded for solving correctly with ≤2 questions."
    difficulty: medium

  - id: supply_chain
    name: "Supply Chain Anomaly Detection"
    description: "Identify quantity shortfalls, price spikes, unauthorized substitutions, and phantom deliveries in a set of supply chain delivery records."
    difficulty: expert

  - id: long_horizon
    name: "Long-Horizon Financial Investigation"
    description: "20-step, 4-phase investigation with sparse rewards. Phase 1: extract 3 invoices. Phase 2: reconcile against POs (unlocked). Phase 3: fraud audit (registry unlocked). Phase 4: risk forecast. Each phase completion required to unlock next phase's reference data."
    difficulty: expert

  - id: personalized
    name: "Personalized Adaptive Task"
    description: "Tracks per-field accuracy (vendor, date, math, completeness) across steps and generates the next invoice to target the agent's weakest field. Reward weighted toward the historically weakest category."
    difficulty: adaptive

  - id: curriculum
    name: "Auto-Progressive Curriculum"
    description: "Automatically progresses the agent through easy→medium→hard→expert based on score. Score ≥0.80 to advance to next stage. Score <0.40 to be held back. Up to 20 steps across all stages."
    difficulty: adaptive

endpoints:
  reset: /reset
  step: /step
  state: /state
  health: /health
  grader: /grader
  tasks: /tasks
  metrics: /metrics
  websocket: /ws

multi_agent_endpoints:
  multi_reset: /multi/reset
  multi_extract: /multi/extract
  multi_audit: /multi/audit
  multi_approve: /multi/approve
  multi_state: /multi/state/{episode_id}

regulator_endpoints:
  report: /regulator/report
  forecast: /regulator/forecast
  calibration: /regulator/calibration
  predict: /regulator/predict
  generator_score: /generator/score

ui:
  gradio: /web
  swagger: /docs