| |
| |
| |
| |
| |
|
|
| """ |
| Data models for the AI Response Evaluation Environment. |
| |
| Three tasks: |
| 1. Correctness & Instruction Adherence (easy) |
| 2. Tone & Audience Appropriateness with structured user profiles (medium) |
| 3. Multi-dimensional Quality Scoring β correctness, tone, empathy, safety (hard) |
| """ |
|
|
| from typing import Literal, Optional |
| from openenv.core.env_server.types import Action, Observation |
| from pydantic import Field |
|
|
|
|
| class CodeAssessmentAction(Action): |
| """Action for submitting an evaluation judgment.""" |
|
|
| answer: str = Field( |
| ..., |
| description=( |
| "Task 1: 'correct|incorrect|partially-correct, reason'\n" |
| "Task 2: 'appropriate|needs-adjustment|inappropriate, issue1,issue2,...'\n" |
| "Task 3: 'correctness=N, tone=N, empathy=N, safety=N' (N = 0β10)" |
| ), |
| ) |
|
|
|
|
| class CodeAssessmentObservation(Observation): |
| """Observation with scenario, user profile, and grading feedback.""" |
|
|
| problem_description: str = Field(default="", description="Task instructions") |
| difficulty: Literal["easy", "medium", "hard"] = Field(default="easy") |
| test_case_input: str = Field(default="", description="Scenario to evaluate") |
| task_type: str = Field(default="correctness_check") |
| language: str = Field(default="en") |
|
|
| |
| user_age: Optional[int] = Field(default=None, description="User's age") |
| user_mood: Optional[str] = Field( |
| default=None, |
| description="User's emotional state: happy, sad, frustrated, anxious, neutral, angry", |
| ) |
| user_context: Optional[str] = Field( |
| default=None, |
| description="Interaction context: education, customer-support, medical, professional, casual, crisis", |
| ) |
|
|
| expected_output: Optional[str] = Field(default=None, description="Correct answer (shown after wrong submission)") |
| feedback: str = Field(default="", description="Detailed grading explanation") |
| is_correct: bool = Field(default=False) |
| partial_credit: float = Field(default=0.0, description="0.0β1.0") |
| problems_solved: int = Field(default=0) |
| current_streak: int = Field(default=0) |
|
|