killer_sudoku_env / models.py
arnavster1's picture
Upload folder using huggingface_hub
95b25ca verified
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.
"""Data models for the Killer Sudoku Environment.
Defines the action space (3 action types for reasoning about Sumdoku puzzles)
and the observation space (board state, candidates, feedback).
"""
from typing import Dict, List, Literal, Optional
from openenv.core.env_server.types import Action, Observation
from pydantic import Field
class KillerSudokuAction(Action):
"""Action for the Killer Sudoku environment.
Supports three action types:
- propose_candidates: Set candidate values for an empty cell
- eliminate_candidate: Remove candidate values with justification
- enter_answer: Place a final answer in a cell
"""
action_type: Literal["propose_candidates", "eliminate_candidate", "enter_answer"] = Field(
..., description="Type of action to perform"
)
x: int = Field(..., description="Column coordinate (0-indexed)")
y: int = Field(..., description="Row coordinate (0-indexed)")
values: Optional[List[int]] = Field(
default=None,
description="Candidate values for propose_candidates or eliminate_candidate",
)
value: Optional[int] = Field(
default=None,
description="Answer value for enter_answer",
)
justification: Optional[
Literal[
"row_constraint",
"column_constraint",
"cage_sum",
"propagation_of_prev_resolution",
"logical_guess",
]
] = Field(
default=None,
description="Reasoning justification for eliminate_candidate",
)
class KillerSudokuObservation(Observation):
"""Observation from the Killer Sudoku environment."""
board_display: str = Field(default="", description="ASCII art of the current board")
rules_prompt: str = Field(
default="",
description="Full rules text (populated on reset, empty on steps)",
)
action_result: str = Field(default="", description="Feedback from last action")
candidates: Dict[str, List[int]] = Field(
default_factory=dict,
description="Current candidate values per cell (key: 'x,y')",
)
incorrect_answers: int = Field(default=0, description="Number of incorrect answers this episode")
n: int = Field(default=9, description="Board size")
difficulty: int = Field(default=15, description="Current difficulty level (1-100)")