Spaces:
Sleeping
Sleeping
| # Copyright (c) Meta Platforms, Inc. and affiliates. | |
| # All rights reserved. | |
| # | |
| # This source code is licensed under the BSD-style license found in the | |
| # LICENSE file in the root directory of this source tree. | |
| """ | |
| Pydantic models for AEGIS-Env (Automated Evaluation & Grading Intelligent System). | |
| This environment is a multi-step state machine that mimics a 4-stage grading pipeline: | |
| arbiter -> scrutinizer -> validator -> mentor -> finished. | |
| """ | |
| from typing import Any, Dict, Optional | |
| from openenv.core.env_server.types import Action, Observation | |
| from pydantic import Field | |
| class AegisObservation(Observation): | |
| question: str = Field(..., description="Assessment question or prompt.") | |
| rubric: str = Field(..., description="Grading rubric text.") | |
| max_score: float = Field(..., gt=0, description="Maximum achievable score for this item.") | |
| student_answer: str = Field(..., description="Student response to evaluate.") | |
| # Backward-compatible defaults: older servers/clients may omit these fields. | |
| current_stage: str = Field( | |
| default="arbiter", | |
| description="Current pipeline stage: arbiter, scrutinizer, validator, mentor, or finished.", | |
| ) | |
| refinement_loops_taken: int = Field( | |
| default=0, | |
| ge=0, | |
| description="Number of validator-requested refinement loops taken so far.", | |
| ) | |
| pipeline_history: str = Field( | |
| default="", | |
| description="Accumulated pipeline transcript across stages (includes stage outputs and reward history).", | |
| ) | |
| done: bool = Field(default=False, description="Whether the episode is complete.") | |
| reward: Optional[float] = Field( | |
| default=None, | |
| description="Reward for this transition (typically 0.0 for intermediate stages; final reward on completion).", | |
| ) | |
| grading_info: Dict[str, Any] = Field( | |
| default_factory=dict, | |
| description="Deterministic grading diagnostics from the last step (empty at reset).", | |
| ) | |
| metadata: Dict[str, Any] = Field( | |
| default_factory=dict, | |
| description="Additional metadata from the environment (optional).", | |
| ) | |
| class AegisAction(Action): | |
| proposed_score: float = Field( | |
| ..., | |
| description="Proposed score for the student answer (in [0, max_score]).", | |
| ) | |
| agent_reasoning: str = Field( | |
| ..., | |
| description="Stage-specific reasoning, critique, or feedback text.", | |
| ) | |
| routing_decision: str = Field( | |
| default="proceed", | |
| description="Must be 'proceed' or 'revise'. Only matters during the 'validator' stage.", | |
| ) | |