File size: 3,391 Bytes
c47c81c
 
 
52fe477
c47c81c
 
 
 
 
52fe477
 
c47c81c
 
52fe477
c47c81c
 
 
 
 
 
 
 
 
 
 
52fe477
c47c81c
 
 
 
 
 
 
 
 
 
 
 
 
52fe477
c47c81c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
52fe477
c47c81c
 
 
 
 
 
 
 
 
52fe477
c47c81c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
52fe477
 
c47c81c
 
 
 
 
 
 
 
 
52fe477
 
c47c81c
52fe477
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
"""
CodeSensei — Typed Models for the CodeDebug OpenEnv Environment.

Defines the Action, Observation, and State Pydantic models that form the
typed contract between the training client and the environment server.
"""

from __future__ import annotations

from typing import List, Optional, Any
from pydantic import BaseModel, Field


class CodeDebugAction(BaseModel):
    """Action sent by the LLM agent to the environment.

    Attributes:
        proposed_fix: The corrected Python function body proposed by the LLM.
        session_id: Unique session identifier for this training episode.
    """

    proposed_fix: str
    session_id: str = ""


class TestResult(BaseModel):
    """Result of a single test case execution.

    Attributes:
        test_name: Human-readable name / description of the test.
        passed: Whether the test passed.
        error_message: Error message if the test failed, empty string otherwise.
    """

    test_name: str
    passed: bool
    error_message: str = ""


class CodeDebugObservation(BaseModel):
    """Observation returned by the environment after each step.

    Attributes:
        buggy_code: The original buggy Python function source.
        current_code: The current version of the code (after applying fix).
        error_output: Stderr/exception output from the most recent execution.
        test_results: Detailed per-test results.
        tests_passed: Number of tests that passed.
        tests_total: Total number of tests.
        reward: Aggregated reward signal for this step.
        done: Whether the episode is complete (all tests pass or max attempts).
        attempt: Current attempt number (1-indexed).
        max_attempts: Maximum allowed attempts per episode.
        feedback: Human-readable feedback string for the LLM's context window.
    """

    buggy_code: str
    current_code: str
    error_output: str
    test_results: List[TestResult] = Field(default_factory=list)
    tests_passed: int = 0
    tests_total: int = 0
    reward: float = 0.0
    done: bool = False
    attempt: int = 0
    max_attempts: int = 6
    feedback: str = ""


class CodeDebugState(BaseModel):
    """Internal state of the environment for a single episode.

    Attributes:
        episode_id: Unique identifier for this episode.
        session_id: WebSocket session identifier.
        attempt: Current attempt number (1-indexed).
        max_attempts: Maximum allowed attempts.
        original_bug: The original buggy code string.
        current_code: The latest version of the code.
        bug_description: Short description of the intentional bug.
        function_name: Name of the function being debugged.
        tests_passed_history: List of tests_passed counts per attempt.
        fix_hashes: Set of SHA-256 hashes of previously proposed fixes.
        solved: Whether the bug has been successfully fixed.
    """
    class Config:
        arbitrary_types_allowed = True

    episode_id: str = ""
    session_id: str = ""
    attempt: int = 0
    max_attempts: int = 6
    original_bug: str = ""
    current_code: str = ""
    bug_description: str = ""
    function_name: str = ""
    tests_passed_history: List[int] = Field(default_factory=list)
    fix_hashes: List[str] = Field(default_factory=list)
    solved: bool = False
    # Not using Field for internal _bug_data to avoid pydantic issues with raw dicts