Spaces:
Sleeping
Sleeping
File size: 1,538 Bytes
dfbb493 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 | import random
# Q-table
Q = {}
# Actions
ACTIONS = [
"PASS",
"SCRAP",
"ROUTE_COMPONENT_REPLACEMENT",
"ROUTE_SOLDERING",
"ROUTE_DIAGNOSTICS",
"WAIT"
]
def get_valid_actions(defect):
if defect == "none":
return ["PASS"]
if defect == "missing_component":
return ["ROUTE_COMPONENT_REPLACEMENT", "SCRAP"]
if defect == "solder_bridge":
return ["ROUTE_SOLDERING", "WAIT", "SCRAP"]
if defect == "short_circuit":
return ["SCRAP", "ROUTE_DIAGNOSTICS"]
return ["SCRAP"]
# Convert PCB → STATE
def get_state(pcb, factory):
slots_free = factory["soldering_slots"].count(0)
return (
pcb["defect_type"],
round(pcb["component_cost"] / 50), # bucket cost
round(pcb["criticality"], 1),
slots_free
)
# Initialize state
def init_state(state):
if state not in Q:
Q[state] = {a: 0 for a in ACTIONS}
# Epsilon-Greedy policy
def choose_action(state, epsilon=0.3):
init_state(state)
defect = state[0]
valid_actions = get_valid_actions(defect)
# Exploration
if random.random() < epsilon:
return random.choice(valid_actions)
# Exploitation (best action among valid ones)
return max(valid_actions, key=lambda a: Q[state][a])
# Q-learning update
def update_q(state, action, reward, next_state, alpha=0.1, gamma=0.9):
init_state(next_state)
old = Q[state][action]
future = max(Q[next_state].values())
Q[state][action] = old + alpha * (reward + gamma * future - old) |