name: context-corruption-env
version: "1.0.0"
description: >
  OpenEnv environment for training epistemic robustness in LLMs.
  Agents identify correct answers and flag corrupted documents
  in a multi-doc QA setting with verifiable, objective rewards.
author: "Siddh Sanghavi, Aagam Parekh"
license: MIT

environment:
  entrypoint: "environment.server:app"
  action_schema: "environment.actions.ContextCorruptionAction"
  observation_schema: "environment.actions.EpisodeObservation"
  max_concurrent_sessions: 64

reward:
  type: "objective"
  range: [-0.5, 1.05]
  components:
    - {name: answer_correctness,      weight: 0.4}
    - {name: corruption_detection,    weight: 0.3}
    - {name: false_positive_penalty,  weight: 0.2}
    - {name: confidence_calibration,  weight: 0.1}

datasets:
  - {name: Natural Questions, url: "https://huggingface.co/datasets/google-research-datasets/natural_questions"}
  - {name: PopQA,             url: "https://huggingface.co/datasets/akariasai/PopQA"}
  - {name: FaithEval,         url: "https://github.com/SalesforceAIResearch/FaithEval"}

citation: |
  @misc{contextcorruption2026,
    title={ContextCorruption-Env: Training Epistemic Robustness in LLMs},
    year={2026},
    note={OpenEnv Hackathon Submission}
  }