aegis-env / models.py
NishithP2004's picture
Upload folder using huggingface_hub
fa01cfa verified
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.
"""
Pydantic models for AEGIS-Env (Automated Evaluation & Grading Intelligent System).
This environment is a multi-step state machine that mimics a 4-stage grading pipeline:
arbiter -> scrutinizer -> validator -> mentor -> finished.
"""
from typing import Any, Dict, Optional
from openenv.core.env_server.types import Action, Observation
from pydantic import Field
class AegisObservation(Observation):
question: str = Field(..., description="Assessment question or prompt.")
rubric: str = Field(..., description="Grading rubric text.")
max_score: float = Field(..., gt=0, description="Maximum achievable score for this item.")
student_answer: str = Field(..., description="Student response to evaluate.")
# Backward-compatible defaults: older servers/clients may omit these fields.
current_stage: str = Field(
default="arbiter",
description="Current pipeline stage: arbiter, scrutinizer, validator, mentor, or finished.",
)
refinement_loops_taken: int = Field(
default=0,
ge=0,
description="Number of validator-requested refinement loops taken so far.",
)
pipeline_history: str = Field(
default="",
description="Accumulated pipeline transcript across stages (includes stage outputs and reward history).",
)
done: bool = Field(default=False, description="Whether the episode is complete.")
reward: Optional[float] = Field(
default=None,
description="Reward for this transition (typically 0.0 for intermediate stages; final reward on completion).",
)
grading_info: Dict[str, Any] = Field(
default_factory=dict,
description="Deterministic grading diagnostics from the last step (empty at reset).",
)
metadata: Dict[str, Any] = Field(
default_factory=dict,
description="Additional metadata from the environment (optional).",
)
class AegisAction(Action):
proposed_score: float = Field(
...,
description="Proposed score for the student answer (in [0, max_score]).",
)
agent_reasoning: str = Field(
...,
description="Stage-specific reasoning, critique, or feedback text.",
)
routing_decision: str = Field(
default="proceed",
description="Must be 'proceed' or 'revise'. Only matters during the 'validator' stage.",
)