Spaces:

arminfg
/

biosim

Sleeping

App Files Files Community

biosim / scripts /visualize.py

arminfg

SimLab: lab automation RL env, OpenEnv adapter, Training UI, agents

da63ca8 2 months ago

raw

history blame contribute delete

9.81 kB

	#!/usr/bin/env python3
	"""Train, evaluate, and visualize REINFORCE vs Naive agent on LabEnv.

	Produces a 2x2 figure:
	Top-left: Training reward curve (smoothed)
	Top-right: Training success-rate curve (smoothed)
	Bottom-left: Final comparison bar chart (reward, success%, partial%)
	Bottom-right: Single-episode trace showing the RL agent's actions
	"""

	from __future__ import annotations

	import argparse
	import sys
	from pathlib import Path

	sys.path.insert(0, str(Path(__file__).resolve().parent.parent))

	import numpy as np
	import matplotlib.pyplot as plt
	import matplotlib.ticker as mticker

	from lab_env.env import (
	LabEnv,
	INITIAL_BUDGET,
	ACTION_SETUP_START,
	ACTION_SETUP_END,
	ACTION_RUN_ASSAY,
	ACTION_ORDER_TIPS,
	ACTION_ORDER_BUFFER,
	ACTION_ORDER_POLYMERASE,
	ACTION_WAIT,
	ACTION_FINISH,
	PRESETS,
	)
	from agents.naive_agent import NaiveAgent
	from agents.rl_agent import ReinforceAgent


	def smooth(values: list[float], window: int = 50) -> np.ndarray:
	if len(values) < window:
	return np.array(values)
	kernel = np.ones(window) / window
	return np.convolve(values, kernel, mode="valid")


	def run_episode_naive(env: LabEnv, agent: NaiveAgent, seed: int) -> dict:
	obs, info = env.reset(seed=seed)
	agent.reset()
	total_reward = 0.0
	steps = 0
	while True:
	action = agent.select_action(obs)
	obs, reward, terminated, truncated, info = env.step(action)
	total_reward += reward
	steps += 1
	if terminated or truncated:
	break
	return {
	"reward": total_reward,
	"success": info["best_result"] == "success",
	"partial": info["best_result"] == "partial",
	"minutes": info["elapsed_minutes"],
	"cost": INITIAL_BUDGET - info["remaining_budget"],
	"steps": steps,
	}


	def trace_rl_episode(env: LabEnv, agent: ReinforceAgent, seed: int) -> list[dict]:
	"""Run one episode and return a step-by-step trace for visualization."""
	obs, info = env.reset(seed=seed)
	agent.reset()
	trace: list[dict] = []

	for trial in range(agent.max_trials):
	if agent._inventory_low(obs):
	for act in (ACTION_ORDER_TIPS, ACTION_ORDER_BUFFER, ACTION_ORDER_POLYMERASE):
	obs, rew, done, trunc, info = env.step(act)
	trace.append({"action": "order", "label": "Order", "result": "", "reward": rew, "minutes": info["elapsed_minutes"]})
	if done or trunc:
	return trace

	preset = agent._select_preset(obs, deterministic=True)
	p = PRESETS[preset]
	label = f"Setup {p['temp']}C/{p['cycles']}cy/{p['ratio'][:4]}"

	obs, rew, done, trunc, info = env.step(ACTION_SETUP_START + preset)
	trace.append({"action": "setup", "label": label, "result": "", "reward": rew, "minutes": info["elapsed_minutes"]})
	if done or trunc:
	return trace

	obs, rew, done, trunc, info = env.step(ACTION_RUN_ASSAY)
	trace.append({"action": "run", "label": "Run assay", "result": info["last_result"], "reward": rew, "minutes": info["elapsed_minutes"]})
	if done or trunc:
	return trace

	if info.get("best_result") == "success":
	obs, rew, _, _, info = env.step(ACTION_FINISH)
	trace.append({"action": "finish", "label": "Finish", "result": "success", "reward": rew, "minutes": info["elapsed_minutes"]})
	return trace

	if not (done or trunc):
	obs, rew, _, _, info = env.step(ACTION_FINISH)
	trace.append({"action": "finish", "label": "Finish", "result": info["best_result"], "reward": rew, "minutes": info["elapsed_minutes"]})

	return trace


	def main() -> None:
	parser = argparse.ArgumentParser(description="Visualize training & evaluation")
	parser.add_argument("--train-episodes", type=int, default=2000)
	parser.add_argument("--eval-episodes", type=int, default=200)
	parser.add_argument("--seed", type=int, default=42)
	parser.add_argument("--save", type=str, default="", help="Save figure to path instead of showing")
	args = parser.parse_args()

	env = LabEnv()
	rl_agent = ReinforceAgent(max_trials=4)

	# ---- Training with metric collection ----
	print(f"Training REINFORCE for {args.train_episodes} episodes...")
	train_rewards: list[float] = []
	train_successes: list[float] = []

	for ep in range(1, args.train_episodes + 1):
	result = rl_agent.run_episode(env, seed=args.seed + ep, train=True)
	train_rewards.append(result["reward"])
	train_successes.append(float(result["success"]))
	if ep % 500 == 0:
	print(f" ...episode {ep}/{args.train_episodes}")

	# ---- Evaluation ----
	print(f"Evaluating both agents for {args.eval_episodes} episodes...")
	eval_seed = 999_999
	naive_agent = NaiveAgent(num_trials=3, seed=0)

	rl_eval = [rl_agent.run_episode(env, seed=eval_seed + i, train=False) for i in range(args.eval_episodes)]
	naive_eval = [run_episode_naive(env, naive_agent, seed=eval_seed + i) for i in range(args.eval_episodes)]

	# ---- Episode trace ----
	trace = trace_rl_episode(env, rl_agent, seed=12345)

	env.close()

	# ---- Aggregate ----
	def agg(results):
	n = len(results)
	return {
	"reward": sum(r["reward"] for r in results) / n,
	"success": sum(r["success"] for r in results) / n,
	"partial": sum(r["partial"] for r in results) / n,
	"minutes": sum(r["minutes"] for r in results) / n,
	}

	rl_stats = agg(rl_eval)
	naive_stats = agg(naive_eval)

	# ==================================================================
	# Plot
	# ==================================================================
	fig, axes = plt.subplots(2, 2, figsize=(14, 10))
	fig.suptitle("SimLab — Lab Automation RL Environment", fontsize=16, fontweight="bold")

	# -- Top-left: reward curve --
	ax = axes[0, 0]
	smoothed_r = smooth(train_rewards, window=50)
	ax.plot(range(len(smoothed_r)), smoothed_r, color="#2563eb", linewidth=1.5)
	ax.axhline(y=0, color="gray", linestyle="--", alpha=0.5)
	ax.set_title("Training Reward (smoothed, window=50)")
	ax.set_xlabel("Episode")
	ax.set_ylabel("Total Episode Reward")
	ax.grid(True, alpha=0.3)

	# -- Top-right: success rate curve --
	ax = axes[0, 1]
	smoothed_s = smooth(train_successes, window=100) * 100
	ax.plot(range(len(smoothed_s)), smoothed_s, color="#16a34a", linewidth=1.5)
	ax.set_title("Training Success Rate (smoothed, window=100)")
	ax.set_xlabel("Episode")
	ax.set_ylabel("Success %")
	ax.yaxis.set_major_formatter(mticker.PercentFormatter())
	ax.set_ylim(0, 100)
	ax.grid(True, alpha=0.3)

	# -- Bottom-left: comparison bars --
	ax = axes[1, 0]
	metrics = ["Avg Reward", "Success %", "Partial %", "Avg Time (min)"]
	rl_vals = [rl_stats["reward"], rl_stats["success"] * 100, rl_stats["partial"] * 100, rl_stats["minutes"]]
	naive_vals = [naive_stats["reward"], naive_stats["success"] * 100, naive_stats["partial"] * 100, naive_stats["minutes"]]

	x = np.arange(len(metrics))
	w = 0.35
	bars_rl = ax.bar(x - w / 2, rl_vals, w, label="REINFORCE", color="#2563eb", edgecolor="white")
	bars_naive = ax.bar(x + w / 2, naive_vals, w, label="Naive", color="#f97316", edgecolor="white")
	ax.set_xticks(x)
	ax.set_xticklabels(metrics, fontsize=9)
	ax.set_title("Evaluation Comparison")
	ax.legend()
	ax.grid(True, alpha=0.3, axis="y")

	for bar_group in (bars_rl, bars_naive):
	for bar in bar_group:
	h = bar.get_height()
	ax.annotate(f"{h:.1f}", xy=(bar.get_x() + bar.get_width() / 2, h),
	xytext=(0, 4), textcoords="offset points",
	ha="center", va="bottom", fontsize=8)

	# -- Bottom-right: episode trace --
	ax = axes[1, 1]
	if trace:
	y_labels = []
	colors = []
	for i, step in enumerate(trace):
	y_labels.append(step["label"])
	if step["result"] == "success":
	colors.append("#16a34a")
	elif step["result"] == "partial":
	colors.append("#eab308")
	elif step["result"] == "fail":
	colors.append("#dc2626")
	else:
	colors.append("#6b7280")

	y_pos = np.arange(len(trace))
	minutes = [s["minutes"] for s in trace]
	ax.barh(y_pos, minutes, color=colors, edgecolor="white", height=0.6)
	ax.set_yticks(y_pos)
	ax.set_yticklabels(y_labels, fontsize=8)
	ax.invert_yaxis()
	ax.set_xlabel("Elapsed Minutes")
	ax.set_title("Single Episode Trace (RL Agent)")

	for i, step in enumerate(trace):
	if step["result"] in ("success", "partial", "fail"):
	ax.annotate(step["result"], xy=(minutes[i], i),
	xytext=(5, 0), textcoords="offset points",
	va="center", fontsize=8, fontweight="bold",
	color=colors[i])
	else:
	ax.text(0.5, 0.5, "No trace data", ha="center", va="center", transform=ax.transAxes)
	ax.set_title("Single Episode Trace (RL Agent)")

	plt.tight_layout(rect=[0, 0, 1, 0.95])

	if args.save:
	fig.savefig(args.save, dpi=150, bbox_inches="tight")
	print(f"Saved to {args.save}")
	else:
	plt.show()

	# Print summary
	print()
	print(f" REINFORCE: reward={rl_stats['reward']:.1f} success={rl_stats['success']:.1%} time={rl_stats['minutes']:.0f}m")
	print(f" Naive: reward={naive_stats['reward']:.1f} success={naive_stats['success']:.1%} time={naive_stats['minutes']:.0f}m")


	if __name__ == "__main__":
	main()