# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.

"""
Data models for the DebugZero Environment.

The debugZero environment implements the Absolute Zero paradigm for debugging self-play.
"""

from openenv.core.env_server.types import Action, Observation, State
from pydantic import Field
from typing import Optional


class DebugzeroAction(Action):
    """Action for the DebugZero environment representing the Proposer or Solver inputs."""

    role: str = Field(..., description="Role taking action: 'proposer' or 'solver'")
    code: str = Field(..., description="Code injected (by proposer) or fixed (by solver)")


class DebugzeroObservation(Observation):
    """Observation from the DebugZero environment following sandbox execution."""

    role_next: str = Field(default="proposer", description="The role supposed to play next")
    current_code: str = Field(default="", description="The current state of the python code")
    execution_result: str = Field(default="", description="Result of evaluating tests in the sandbox")
    tests_passed: bool = Field(default=False, description="Whether the tests passed")
    syntax_error: bool = Field(default=False, description="Whether the code had a parse/syntax error")
    score: float = Field(default=0.0, description="Episode progress score in the range [0.0, 1.0]")

class DebugzeroState(State):
    """State for the DebugZero environment, extending default state with seed context."""
    seed_id: str = Field(default="", description="ID of the HumanEval function")
    original_code: str = Field(default="", description="Original clean seed code")
    current_code: str = Field(default="", description="Current code after Proposer/Solver turn")
    role_turn: str = Field(default="proposer", description="Current turn's role")