Spaces:
Sleeping
Sleeping
File size: 4,966 Bytes
dfbb493 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 | """
models.py β SpectraQual Typed Pydantic Models
OpenEnv spec requires: typed Observation, Action, Reward models.
"""
from __future__ import annotations
from typing import List, Literal, Optional, Dict, Any
from pydantic import BaseModel, Field
# ---------------------------
# PCB OBSERVATION
# ---------------------------
class PCBObservation(BaseModel):
"""Observation returned after each reset() or step()."""
board_id: str = Field(..., description="Unique board identifier, e.g. SQ-4321")
defect_type: Literal[
"none", "missing_component", "solder_bridge", "short_circuit"
] = Field(..., description="Type of defect detected on the PCB")
component_cost: float = Field(
..., ge=10.0, le=200.0, description="Replacement cost of damaged component in βΉ"
)
criticality: float = Field(
..., ge=0.1, le=1.0, description="Risk score β higher means more critical circuit"
)
slots_free: int = Field(
..., ge=0, description="Number of soldering slots currently available"
)
slots_state: List[int] = Field(
..., description="Remaining time units for each soldering slot (0=free)"
)
is_anomaly: bool = Field(
False, description="True if this board exhibits rare/unusual characteristics"
)
anomaly_score: float = Field(
0.0, ge=0.0, le=1.0, description="Anomaly confidence (0=normal, 1=highly anomalous)"
)
step: int = Field(..., ge=0, description="Current step number in the episode")
task_id: str = Field(..., description="ID of the active task")
valid_actions: List[str] = Field(
..., description="List of valid actions for this observation"
)
# --- Real-time metrics ---
rolling_accuracy: float = Field(
0.0, ge=0.0, le=1.0, description="Fraction of correct decisions so far"
)
throughput: float = Field(
0.0, ge=0.0, description="Boards processed per step so far"
)
cumulative_reward: float = Field(
0.0, description="Cumulative normalized reward so far in this episode"
)
# ---------------------------
# PCB ACTION
# ---------------------------
class PCBAction(BaseModel):
"""Action submitted by an agent to the environment."""
action: Literal[
"PASS",
"SCRAP",
"ROUTE_COMPONENT_REPLACEMENT",
"ROUTE_SOLDERING",
"ROUTE_DIAGNOSTICS",
"WAIT",
] = Field(..., description="Decision made for the current PCB")
# ---------------------------
# REWARD COMPONENTS
# ---------------------------
class RewardComponents(BaseModel):
"""Decomposed reward signal for transparency and debugging."""
defect_reward: float = Field(
..., description="Score for handling the defect correctly (0.0β1.0)"
)
cost_efficiency: float = Field(
..., description="Economic value retained vs. lost (0.0β1.0)"
)
queue_penalty: float = Field(
..., description="Penalty for creating factory bottlenecks (0.0β1.0, lower is worse)"
)
criticality_factor: float = Field(
..., description="Risk-adjusted modifier based on criticality (0.0β1.0)"
)
anomaly_bonus: float = Field(
0.0, description="Bonus for correctly flagging/handling anomalous board (0.0β1.0)"
)
total_raw: float = Field(
..., description="Weighted sum of all components before normalization"
)
normalized: float = Field(
..., ge=0.0, le=1.0, description="Final normalized reward in [0.0, 1.0]"
)
explanation: str = Field(
..., description="Human-readable explanation of why this reward was given"
)
# ---------------------------
# STEP RESULT
# ---------------------------
class StepResult(BaseModel):
"""Full result returned by step() and reset()."""
observation: PCBObservation
reward: float = Field(
0.0, ge=0.0, le=1.0, description="Normalized reward for this step [0.0, 1.0]"
)
reward_components: Optional[RewardComponents] = Field(
None, description="Detailed breakdown of reward components"
)
done: bool = Field(..., description="True if the episode has ended")
info: Dict[str, Any] = Field(
default_factory=dict, description="Additional diagnostic info"
)
# ---------------------------
# TASK RESULT (for graders)
# ---------------------------
class TaskResult(BaseModel):
"""Summary of a completed task run, consumed by graders."""
task_id: str
total_steps: int
rewards: List[float] # per-step normalized rewards
correct_decisions: int
total_decisions: int
bottleneck_count: int # times queue was maxed out
anomaly_total: int # how many anomaly boards appeared
anomaly_flagged: int # how many the agent correctly flagged
cumulative_raw_reward: float
max_possible_raw: float
final_score: float = 0.0 # filled by grader
|