File size: 4,966 Bytes
dfbb493
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
"""
models.py β€” SpectraQual Typed Pydantic Models
OpenEnv spec requires: typed Observation, Action, Reward models.
"""

from __future__ import annotations
from typing import List, Literal, Optional, Dict, Any
from pydantic import BaseModel, Field


# ---------------------------
# PCB OBSERVATION
# ---------------------------
class PCBObservation(BaseModel):
    """Observation returned after each reset() or step()."""

    board_id: str = Field(..., description="Unique board identifier, e.g. SQ-4321")
    defect_type: Literal[
        "none", "missing_component", "solder_bridge", "short_circuit"
    ] = Field(..., description="Type of defect detected on the PCB")
    component_cost: float = Field(
        ..., ge=10.0, le=200.0, description="Replacement cost of damaged component in β‚Ή"
    )
    criticality: float = Field(
        ..., ge=0.1, le=1.0, description="Risk score β€” higher means more critical circuit"
    )
    slots_free: int = Field(
        ..., ge=0, description="Number of soldering slots currently available"
    )
    slots_state: List[int] = Field(
        ..., description="Remaining time units for each soldering slot (0=free)"
    )
    is_anomaly: bool = Field(
        False, description="True if this board exhibits rare/unusual characteristics"
    )
    anomaly_score: float = Field(
        0.0, ge=0.0, le=1.0, description="Anomaly confidence (0=normal, 1=highly anomalous)"
    )
    step: int = Field(..., ge=0, description="Current step number in the episode")
    task_id: str = Field(..., description="ID of the active task")
    valid_actions: List[str] = Field(
        ..., description="List of valid actions for this observation"
    )

    # --- Real-time metrics ---
    rolling_accuracy: float = Field(
        0.0, ge=0.0, le=1.0, description="Fraction of correct decisions so far"
    )
    throughput: float = Field(
        0.0, ge=0.0, description="Boards processed per step so far"
    )
    cumulative_reward: float = Field(
        0.0, description="Cumulative normalized reward so far in this episode"
    )


# ---------------------------
# PCB ACTION
# ---------------------------
class PCBAction(BaseModel):
    """Action submitted by an agent to the environment."""

    action: Literal[
        "PASS",
        "SCRAP",
        "ROUTE_COMPONENT_REPLACEMENT",
        "ROUTE_SOLDERING",
        "ROUTE_DIAGNOSTICS",
        "WAIT",
    ] = Field(..., description="Decision made for the current PCB")


# ---------------------------
# REWARD COMPONENTS
# ---------------------------
class RewardComponents(BaseModel):
    """Decomposed reward signal for transparency and debugging."""

    defect_reward: float = Field(
        ..., description="Score for handling the defect correctly (0.0–1.0)"
    )
    cost_efficiency: float = Field(
        ..., description="Economic value retained vs. lost (0.0–1.0)"
    )
    queue_penalty: float = Field(
        ..., description="Penalty for creating factory bottlenecks (0.0–1.0, lower is worse)"
    )
    criticality_factor: float = Field(
        ..., description="Risk-adjusted modifier based on criticality (0.0–1.0)"
    )
    anomaly_bonus: float = Field(
        0.0, description="Bonus for correctly flagging/handling anomalous board (0.0–1.0)"
    )
    total_raw: float = Field(
        ..., description="Weighted sum of all components before normalization"
    )
    normalized: float = Field(
        ..., ge=0.0, le=1.0, description="Final normalized reward in [0.0, 1.0]"
    )
    explanation: str = Field(
        ..., description="Human-readable explanation of why this reward was given"
    )


# ---------------------------
# STEP RESULT
# ---------------------------
class StepResult(BaseModel):
    """Full result returned by step() and reset()."""

    observation: PCBObservation
    reward: float = Field(
        0.0, ge=0.0, le=1.0, description="Normalized reward for this step [0.0, 1.0]"
    )
    reward_components: Optional[RewardComponents] = Field(
        None, description="Detailed breakdown of reward components"
    )
    done: bool = Field(..., description="True if the episode has ended")
    info: Dict[str, Any] = Field(
        default_factory=dict, description="Additional diagnostic info"
    )


# ---------------------------
# TASK RESULT (for graders)
# ---------------------------
class TaskResult(BaseModel):
    """Summary of a completed task run, consumed by graders."""

    task_id: str
    total_steps: int
    rewards: List[float]                  # per-step normalized rewards
    correct_decisions: int
    total_decisions: int
    bottleneck_count: int                  # times queue was maxed out
    anomaly_total: int                     # how many anomaly boards appeared
    anomaly_flagged: int                   # how many the agent correctly flagged
    cumulative_raw_reward: float
    max_possible_raw: float
    final_score: float = 0.0              # filled by grader