File size: 3,834 Bytes
43f41de
9a3b69b
8fa7af1
9a3b69b
 
 
 
8fa7af1
 
 
 
 
9a3b69b
43f41de
 
 
 
 
 
 
 
 
 
9a3b69b
43f41de
9a3b69b
43f41de
 
 
eb1ebe6
 
 
43f41de
 
 
 
eb1ebe6
 
43f41de
 
 
 
 
eb1ebe6
 
43f41de
eb1ebe6
 
43f41de
eb1ebe6
 
 
43f41de
9a3b69b
 
 
43f41de
 
 
 
 
9a3b69b
 
 
 
eb1ebe6
9a3b69b
eb1ebe6
9a3b69b
 
 
 
 
eb1ebe6
9a3b69b
eb1ebe6
 
b12f1bd
 
 
eb1ebe6
 
43f41de
eb1ebe6
 
 
 
 
 
8fa7af1
 
 
 
eb1ebe6
 
 
 
 
8fa7af1
9a3b69b
43f41de
8fa7af1
43f41de
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
"""Data models for the Research -> Interactive Explainer environment."""

from typing import Any, Literal

from openenv.core.env_server.types import Action, Observation
from pydantic import Field

try:
    from .constants import MAX_EXPLORE_STEPS, MAX_REPAIR_STEPS
except ImportError:  # pragma: no cover - supports direct test execution
    from constants import MAX_EXPLORE_STEPS, MAX_REPAIR_STEPS


ResearchTool = Literal[
    "search_wikipedia",
    "search_hf_papers",
    "search_arxiv",
    "search_scholar",
    "fetch_docs",
    "search_hf_hub",
]


class ExplainerAction(Action):
    """Action: agent explores, generates, or repairs an artifact."""

    action_type: Literal["explore", "generate", "repair"] = Field(
        ...,
        description="'explore' to research, 'generate' to produce code, 'repair' to fix code",
    )

    # -- explore fields --
    tool: ResearchTool | None = Field(
        default=None,
        description="Research tool to call when action_type='explore'",
    )
    query: str = Field(
        default="",
        description="Research query used when action_type='explore'",
    )
    intent: str = Field(
        default="",
        description="Brief goal for the research call, e.g. equations or visual intuition",
    )

    # -- generate / repair fields --
    format: Literal["marimo", "manim"] | None = Field(
        default=None,
        description="Output format (required for generate/repair)",
    )
    code: str = Field(
        default="",
        description="Complete Python source code (required for generate/repair)",
    )
    narration: str = Field(
        default="",
        description="Narration script (used when format='manim')",
    )
    repair_notes: str = Field(
        default="",
        description="Short explanation of what changed when action_type='repair'",
    )


class ExplainerObservation(Observation):
    """Observation returned to the agent after each step."""

    # -- task info (set on reset, echoed back each step) --
    topic: str = Field(default="", description="Title of the topic or paper")
    content: str = Field(default="", description="Abstract or concept description")
    tier: Literal["beginner", "intermediate", "advanced"] = Field(
        default="beginner", description="Explanation depth tier"
    )
    keywords: str = Field(default="", description="Comma-separated key terms")
    data_available: bool = Field(
        default=False, description="Whether the topic references datasets"
    )
    difficulty: Literal["easy", "medium", "hard"] = Field(
        default="easy", description="Task difficulty tier"
    )

    # -- per-step feedback --
    phase: Literal["explore", "generate", "repair", "done"] = Field(
        default="explore", description="Current episode phase"
    )
    feedback: str = Field(default="", description="Feedback on the last action")
    search_results: str = Field(
        default="", description="Papers/snippets returned from an explore step"
    )
    top_chunks: list[dict[str, Any]] = Field(
        default_factory=list,
        description="Ranked top chunks returned from the last explore step",
    )
    explored_context: str = Field(
        default="",
        description="Accumulated research context from all explore steps so far",
    )
    explore_steps_left: int = Field(
        default=MAX_EXPLORE_STEPS, description="Remaining explore steps before forced generate"
    )
    repair_attempts_left: int = Field(
        default=MAX_REPAIR_STEPS, description="Remaining repair attempts after failed generation"
    )
    last_errors: str = Field(
        default="", description="Latest lint/build errors available for repair"
    )
    available_tools: list[str] = Field(
        default_factory=list, description="Research tools available during explore"
    )