File size: 2,034 Bytes
acf77ab
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
from __future__ import annotations

from dataclasses import dataclass
from enum import StrEnum

from openenv.core.env_server.types import Action, Observation
from pydantic import Field


class CodeForgeActionType(StrEnum):
    QUERY_KB = "query_kb"
    QUERY_CLUSTER = "query_cluster"
    INTERROGATE = "interrogate"
    RUN_RALPH = "run_ralph"
    SUBMIT = "submit"
    GET_AUDIT = "get_audit"


class CodeForgeAction(Action):
    action_type: CodeForgeActionType
    # query_kb fields
    claim: str | None = None
    top_k: int = 5
    required_tags: tuple[str, ...] = ()
    # submit fields
    files: dict[str, str] | None = None
    confidence: float | None = Field(default=None, ge=0.0, le=1.0)
    # query_cluster fields
    cluster_label: str | None = None
    # run_ralph fields
    max_iters: int = Field(default=3, ge=1, le=10)
    # get_audit fields
    target_run_id: str | None = None


class CodeForgeObservation(Observation):
    episode_id: str
    task_id: str
    task_level: str
    task_brief: str
    initial_files: dict[str, str]
    current_files: dict[str, str]
    budget_remaining: int
    previous_score: float
    last_reward: float
    is_done: bool
    # KB results
    last_citations: tuple[dict[str, object], ...] = ()
    last_grounding: dict[str, object] | None = None
    # Cluster results
    last_cluster_hits: tuple[str, ...] = ()
    # Interrogation results
    last_interrogation_questions: tuple[str, ...] = ()
    # Ralph results
    last_ralph_run_id: str | None = None
    last_ralph_iterations: tuple[dict[str, object], ...] = ()
    # Audit summary
    cumulative_audit_summary: dict[str, object] = Field(default_factory=dict)
    # Error field
    error: str | None = None


@dataclass(frozen=True)
class AuditEntry:
    step_index: int
    action_type: str
    cited_skill_ids: tuple[str, ...]
    cited_clusters: tuple[str, ...]
    grounding_report: dict[str, object] | None
    reward: float
    brier_penalty: float | None
    confidence_declared: float | None
    quality: float