spectraqual / src /agent.py
taarunforge's picture
Deploy SpectraQual OpenEnv environment
dfbb493
import random
# Q-table
Q = {}
# Actions
ACTIONS = [
"PASS",
"SCRAP",
"ROUTE_COMPONENT_REPLACEMENT",
"ROUTE_SOLDERING",
"ROUTE_DIAGNOSTICS",
"WAIT"
]
def get_valid_actions(defect):
if defect == "none":
return ["PASS"]
if defect == "missing_component":
return ["ROUTE_COMPONENT_REPLACEMENT", "SCRAP"]
if defect == "solder_bridge":
return ["ROUTE_SOLDERING", "WAIT", "SCRAP"]
if defect == "short_circuit":
return ["SCRAP", "ROUTE_DIAGNOSTICS"]
return ["SCRAP"]
# Convert PCB → STATE
def get_state(pcb, factory):
slots_free = factory["soldering_slots"].count(0)
return (
pcb["defect_type"],
round(pcb["component_cost"] / 50), # bucket cost
round(pcb["criticality"], 1),
slots_free
)
# Initialize state
def init_state(state):
if state not in Q:
Q[state] = {a: 0 for a in ACTIONS}
# Epsilon-Greedy policy
def choose_action(state, epsilon=0.3):
init_state(state)
defect = state[0]
valid_actions = get_valid_actions(defect)
# Exploration
if random.random() < epsilon:
return random.choice(valid_actions)
# Exploitation (best action among valid ones)
return max(valid_actions, key=lambda a: Q[state][a])
# Q-learning update
def update_q(state, action, reward, next_state, alpha=0.1, gamma=0.9):
init_state(next_state)
old = Q[state][action]
future = max(Q[next_state].values())
Q[state][action] = old + alpha * (reward + gamma * future - old)