name: indic_scripture_qa
version: "1.1.0"
description: >
  An RL environment for evaluating language agents on Indic scripture
  hallucination detection, correction, AND semantic structure quality.
  Agents receive a question and a potentially flawed answer — which may
  contain factual hallucinations, structural incoherence, missing
  terminology, or poor logical ordering — and must retrieve source passages,
  edit, restructure, cite, and decide whether to accept or reject.

author: kishlay-notabot
license: MIT

env:
  module: main:app
  port: 7860
  health_endpoint: /health

action_space:
  type: object
  fields:
    action_type:
      type: string
      enum: [RETRIEVE, EDIT, RESTRUCTURE, CITE, ACCEPT, REJECT]
      description: >
        RETRIEVE fetches source passages. EDIT fixes factual content.
        RESTRUCTURE reorganises flow/coherence without changing facts.
        CITE adds a scripture reference. ACCEPT/REJECT are terminal.
    payload:
      type: string
      nullable: true
      description: >
        Content for the action — query for RETRIEVE, new answer text for
        EDIT or RESTRUCTURE, citation string for CITE, unused for ACCEPT/REJECT.

observation_space:
  type: object
  fields:
    question:
      type: string
    current_answer:
      type: string
    retrieved_passages:
      type: array
      items: { type: string }
    current_citations:
      type: array
      items: { type: string }
    steps_remaining:
      type: integer
    task_name:
      type: string
    feedback:
      type: string
      nullable: true
    structural_hints:
      type: array
      items: { type: string }
      description: >
        Non-spoiler hints about expected answer structure (e.g. required
        terminology, conceptual ordering, completeness expectations).

tasks:
  - name: verify-factual
    description: >
      Easy — Verify whether a given answer is factually correct and
      structurally sound. Accept good answers, reject or fix bad ones.
    max_steps: 5
    num_scenarios: 5

  - name: correct-and-cite
    description: >
      Medium — Improve a partially correct answer by fixing factual gaps,
      restructuring for coherence, adding proper terminology, and citing
      scripture sources.
    max_steps: 8
    num_scenarios: 5

  - name: fix-hallucination
    description: >
      Hard — Detect subtle hallucinations, fix factual errors, correct
      misused Sanskrit terms, reorder logical flow, and ensure complete
      coverage of required conceptual aspects.
    max_steps: 12
    num_scenarios: 5

reward:
  range: [0.0, 1.0]
  components:
    factual:
      weight: 0.43
      description: >
        Token-F1 similarity to ground truth answer (×0.90) plus citation
        recall (×0.30).
    structural:
      weight: 0.33
      description: >
        Composite of four axes: terminology precision (0.30), completeness
        (0.25), logical ordering (0.25), and coherence (0.20). Includes
        banned-term penalties for common misconceptions.
    efficiency:
      weight: 0.10
      description: Bonus for using fewer steps.
    step_shaping:
      weight: 0.14
      description: >
        Per-step signals: +0.05 for useful retrieval, +0.20–0.50 for
        quality edits/restructures, +0.15 for correct citations, with
        penalties for redundancy and degradation.