File size: 1,911 Bytes
57fc36e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
from __future__ import annotations

from enum import Enum
from typing import Any, Literal

from pydantic import BaseModel, Field, field_validator


class TaskType(str, Enum):
    EASY = "easy_docker"
    MEDIUM = "medium_k8s"
    HARD = "hard_ml_config"


class ConfigAction(BaseModel):
    operation: Literal["edit", "add", "delete"] = Field(
        description="Operation type"
    )
    path: str = Field(description="Dot path, list indexes allowed (example: a.b.0.c)")
    value: Any | None = Field(default=None, description="Value used for edit/add")

    @field_validator("path")
    @classmethod
    def _validate_path(cls, value: str) -> str:
        cleaned = value.strip()
        if not cleaned:
            raise ValueError("path cannot be empty")
        return cleaned


class ConfigObservation(BaseModel):
    task_id: TaskType
    task_description: str
    current_config: str
    syntax_valid: bool
    validation_errors: list[str] = Field(default_factory=list)
    schema_score: float = Field(ge=0.0, le=1.0)
    logic_score: float = Field(ge=0.0, le=1.0)
    overall_score: float = Field(ge=0.0, le=1.0)
    step_count: int = Field(ge=0)
    max_steps: int = Field(ge=1)


class ConfigReward(BaseModel):
    value: float = Field(ge=0.0, le=1.0)
    previous_score: float = Field(ge=0.0, le=1.0)
    current_score: float = Field(ge=0.0, le=1.0)
    delta: float
    penalties: list[str] = Field(default_factory=list)


class EnvState(BaseModel):
    task_id: TaskType | None = None
    done: bool
    step_count: int = Field(ge=0)
    max_steps: int = Field(ge=1)
    observation: ConfigObservation | None = None
    last_reward: ConfigReward | None = None


class ResetRequest(BaseModel):
    task_id: TaskType | None = None
    task: TaskType | None = None


class StepResponse(BaseModel):
    observation: ConfigObservation
    reward: ConfigReward
    done: bool
    info: dict[str, Any]