Spaces:

arminfg
/

biosim

Sleeping

App Files Files Community

biosim / server /app.py

arminfg

SimLab: lab automation RL env, OpenEnv adapter, Training UI, agents

da63ca8 2 months ago

raw

history blame contribute delete

25 kB

	"""
	FastAPI server bridging the LabEnv Python backend to the Next.js frontend.

	Endpoints:
	POST /api/training/start — train the agent (SSE stream)
	POST /api/run/ai — run one AI-agent episode
	POST /api/run/naive — run one naive-agent episode
	POST /api/env/reset — reset environment
	POST /api/env/step — take one step
	GET /api/stats — dashboard aggregate stats
	"""

	from __future__ import annotations

	import json
	import sys
	import time
	from pathlib import Path
	from typing import Any

	from fastapi import FastAPI, Request
	from fastapi.middleware.cors import CORSMiddleware
	from fastapi.responses import StreamingResponse
	from pydantic import BaseModel

	sys.path.insert(0, str(Path(__file__).resolve().parent.parent))

	from lab_env.env import (
	LabEnv,
	INITIAL_BUDGET,
	ACTION_SETUP_START,
	ACTION_RUN_ASSAY,
	ACTION_ORDER_TIPS,
	ACTION_ORDER_BUFFER,
	ACTION_ORDER_POLYMERASE,
	ACTION_WAIT,
	ACTION_FINISH,
	)
	from lab_env.spec import pcr_experiment_spec, get_spec_for_workflow
	from agents.naive_agent import NaiveAgent
	from agents.rl_agent import ReinforceAgent

	# Per-workflow envs (created on first use). RL agent is shared and trained on PCR.
	_envs: dict[str, LabEnv] = {}

	try:
	from agents.research_llm_agent import ResearchLLMAgent
	HAS_RESEARCH_AGENT = True
	except ImportError:
	ResearchLLMAgent = None
	HAS_RESEARCH_AGENT = False

	try:
	from agents.research_generate_agent import ResearchGenerateAgent
	HAS_RESEARCH_GENERATE_AGENT = True
	except ImportError:
	ResearchGenerateAgent = None
	HAS_RESEARCH_GENERATE_AGENT = False

	app = FastAPI(title="SimLab API")
	app.add_middleware(
	CORSMiddleware,
	allow_origins=["*"],
	allow_methods=["*"],
	allow_headers=["*"],
	)

	rl_agent: ReinforceAgent \| None = None
	_trained_agents: dict[str, ReinforceAgent] = {} # workflow_id -> agent (for UI per-protocol training)
	run_history: list[dict] = []


	def _get_env(workflow_id: str) -> LabEnv:
	"""Get or create LabEnv for this workflow. Uses spec from get_spec_for_workflow(workflow_id)."""
	if workflow_id not in _envs:
	spec = get_spec_for_workflow(workflow_id)
	_envs[workflow_id] = LabEnv(spec=spec)
	return _envs[workflow_id]


	# ──────────────────────────────────────────────
	# Request / response models
	# ──────────────────────────────────────────────

	class TrainRequest(BaseModel):
	episodes: int = 2000
	lr: float = 3e-3
	max_trials: int = 4
	eval_episodes: int = 100
	workflow_id: str = "pcr-amplification"

	class StepRequest(BaseModel):
	action: int
	workflow_id: str = "pcr-amplification"

	class RunRequest(BaseModel):
	seed: int = 42
	workflow_id: str = "pcr-amplification"


	# ──────────────────────────────────────────────
	# Helpers
	# ──────────────────────────────────────────────

	def _env_state_dict(env: LabEnv) -> dict[str, Any]:
	info = env._info()
	return {
	"step_index": info["step_index"],
	"elapsed_minutes": info["elapsed_minutes"],
	"remaining_budget": info["remaining_budget"],
	"inventory": info["inventory"],
	"last_result": info["last_result"],
	"best_result": info["best_result"],
	"max_time": 240,
	"max_budget": 500,
	}


	def _trace_episode(env: LabEnv, agent: ReinforceAgent, seed: int) -> dict:
	"""Run an AI episode and produce a step-by-step timeline."""
	presets = env.spec.presets
	obs, info = env.reset(seed=seed)
	agent.reset()
	timeline: list[dict] = []
	presets_tried: dict[int, str] = {}

	for trial in range(agent.max_trials):
	if agent._inventory_low(obs):
	for act in (ACTION_ORDER_TIPS, ACTION_ORDER_BUFFER, ACTION_ORDER_POLYMERASE):
	obs, rew, done, trunc, info = env.step(act)
	timeline.append({
	"title": "Order Reagents",
	"description": _order_label(act),
	"time": f"{info['elapsed_minutes']:.0f} min",
	"status": "action",
	"icon": "order",
	})
	if done or trunc:
	return _build_run_result(env, info, timeline, presets_tried)

	preset = agent._select_preset(obs, deterministic=True)
	p = presets[preset]
	label = _preset_label(p)

	obs, rew, done, trunc, info = env.step(ACTION_SETUP_START + preset)
	timeline.append({
	"title": "Setup",
	"description": label,
	"time": f"{info['elapsed_minutes']:.0f} min",
	"status": "pending",
	"icon": "setup",
	})
	if done or trunc:
	return _build_run_result(env, info, timeline, presets_tried)

	obs, rew, done, trunc, info = env.step(ACTION_RUN_ASSAY)
	result = info["last_result"]
	presets_tried[preset] = result
	timeline.append({
	"title": "Run Assay",
	"description": _result_description(result),
	"time": f"{info['elapsed_minutes']:.0f} min",
	"status": result,
	"icon": "run",
	})
	if done or trunc:
	return _build_run_result(env, info, timeline, presets_tried)

	if info.get("best_result") == "success":
	obs, rew, _, _, info = env.step(ACTION_FINISH)
	timeline.append({
	"title": "Finish",
	"description": "Experiment complete — success!",
	"time": f"{info['elapsed_minutes']:.0f} min",
	"status": "success",
	"icon": "finish",
	})
	return _build_run_result(env, info, timeline, presets_tried)

	obs, rew, _, _, info = env.step(ACTION_FINISH)
	timeline.append({
	"title": "Finish",
	"description": f"Experiment complete — best: {info['best_result']}",
	"time": f"{info['elapsed_minutes']:.0f} min",
	"status": info["best_result"] if info["best_result"] in ("success", "partial") else "fail",
	"icon": "finish",
	})
	return _build_run_result(env, info, timeline, presets_tried)


	def _trace_naive_episode(env: LabEnv, agent: NaiveAgent, seed: int) -> dict:
	presets = env.spec.presets
	num_presets = len(presets)
	obs, info = env.reset(seed=seed)
	agent.reset()
	timeline: list[dict] = []
	presets_tried: dict[int, str] = {}
	total_reward = 0.0

	while True:
	action = agent.select_action(obs)
	obs, reward, done, trunc, info = env.step(action)
	total_reward += reward

	if ACTION_SETUP_START <= action < ACTION_SETUP_START + num_presets:
	p = presets[action - ACTION_SETUP_START]
	timeline.append({
	"title": "Setup",
	"description": _preset_label(p),
	"time": f"{info['elapsed_minutes']:.0f} min",
	"status": "pending",
	"icon": "setup",
	})
	elif action == ACTION_RUN_ASSAY:
	result = info["last_result"]
	timeline.append({
	"title": "Run Assay",
	"description": _result_description(result),
	"time": f"{info['elapsed_minutes']:.0f} min",
	"status": result,
	"icon": "run",
	})
	elif action in (ACTION_ORDER_TIPS, ACTION_ORDER_BUFFER, ACTION_ORDER_POLYMERASE):
	timeline.append({
	"title": "Order Reagents",
	"description": _order_label(action),
	"time": f"{info['elapsed_minutes']:.0f} min",
	"status": "action",
	"icon": "order",
	})
	elif action == ACTION_FINISH:
	timeline.append({
	"title": "Finish",
	"description": f"Experiment complete — best: {info['best_result']}",
	"time": f"{info['elapsed_minutes']:.0f} min",
	"status": info["best_result"] if info["best_result"] in ("success", "partial") else "fail",
	"icon": "finish",
	})

	if done or trunc:
	break

	return _build_run_result(env, info, timeline, presets_tried)


	def _build_run_result(env: LabEnv, info: dict, timeline: list[dict], presets_tried: dict[int, str]) -> dict:
	presets = env.spec.presets
	spec = env.spec
	preset_statuses = []
	for i, p in enumerate(presets):
	row: dict[str, Any] = {
	"id": str(i),
	"status": presets_tried.get(i, "untried"),
	"label": _preset_label(p),
	}
	if "temp" in p:
	row["temp"] = p["temp"]
	row["cycles"] = p["cycles"]
	row["ratio"] = p["ratio"]
	if "coating_hr" in p:
	row["coating_hr"] = p["coating_hr"]
	row["block"] = p.get("block", "")
	preset_statuses.append(row)
	return {
	"state": {
	"elapsed_minutes": info["elapsed_minutes"],
	"remaining_budget": info["remaining_budget"],
	"inventory": info["inventory"],
	"best_result": info["best_result"],
	"max_time": getattr(spec, "max_minutes", 240),
	"max_budget": getattr(spec, "initial_budget", 500),
	},
	"timeline": timeline,
	"presets": preset_statuses,
	"reward": float(INITIAL_BUDGET - info["remaining_budget"]),
	"best_result": info["best_result"],
	}


	def _result_description(result: str) -> str:
	return {"success": "Success!", "partial": "Partial — low yield", "fail": "Failed — no amplification"}.get(result, result)


	def _order_label(action: int) -> str:
	return {ACTION_ORDER_TIPS: "+5 tips", ACTION_ORDER_BUFFER: "+5 buffer", ACTION_ORDER_POLYMERASE: "+3 polymerase"}.get(action, "reagents")


	def _preset_label(preset: dict) -> str:
	"""Human-readable preset description for timeline/UI (PCR or ELISA)."""
	if "coating_hr" in preset:
	return f"{preset['coating_hr']}hr coat / {preset['temp']}°C / {preset.get('block', '')}"
	return f"{preset.get('temp', '?')}°C / {preset.get('cycles', '?')} cyc / {preset.get('ratio', '?')}"


	def _trace_research_episode(env: LabEnv, seed: int, max_trials: int = 5) -> dict:
	"""Run Research LLM agent episode and build timeline (Research → Hypothesis → Experiment → Learn). PCR only."""
	presets = env.spec.presets
	if not HAS_RESEARCH_AGENT:
	return _build_run_result(env, env._info(), [{"title": "Research agent unavailable", "description": "Install openai and set OPENAI_API_KEY", "time": "0 min", "status": "fail", "icon": "run"}], {})
	if env.spec.name != "pcr":
	return _build_run_result(env, env._info(), [{"title": "Research agent", "description": "Research agent is only supported for PCR workflow.", "time": "0 min", "status": "fail", "icon": "run"}], {})
	agent = ResearchLLMAgent(max_trials=max_trials)
	callback: list[dict] = []
	result = agent.run_episode(env, seed=seed, episode_callback=callback)
	info = env._info()
	timeline: list[dict] = []
	presets_tried: dict[int, str] = {}

	for step in callback:
	research = (step.get("research") or "")[:200]
	if len(step.get("research") or "") > 200:
	research += "..."
	timeline.append({
	"title": "Research",
	"description": research or "Literature search for PCR protocol",
	"time": f"{info.get('elapsed_minutes', 0):.0f} min",
	"status": "action",
	"icon": "research",
	})
	hyp = step.get("hypothesis") or {}
	timeline.append({
	"title": "Hypothesis",
	"description": f"temp={hyp.get('temp', '?')}°C, cycles={hyp.get('cycles', '?')}, ratio={hyp.get('ratio', '?')}",
	"time": f"{info.get('elapsed_minutes', 0):.0f} min",
	"status": "pending",
	"icon": "hypothesis",
	})
	params = step.get("params_used") or {}
	res = step.get("result", "fail")
	timeline.append({
	"title": "Run Assay",
	"description": _result_description(res),
	"time": f"{info.get('elapsed_minutes', 0):.0f} min",
	"status": res,
	"icon": "run",
	})
	for i, p in enumerate(presets):
	if p.get("temp") == params.get("temp") and p.get("cycles") == params.get("cycles") and p.get("ratio") == params.get("ratio"):
	presets_tried[i] = res
	break
	timeline.append({
	"title": "Learn",
	"description": f"temp_range={agent.knowledge.get('temp_range', [])}, cycle_range={agent.knowledge.get('cycle_range', [])}",
	"time": f"{info.get('elapsed_minutes', 0):.0f} min",
	"status": "action",
	"icon": "learn",
	})

	return _build_run_result(env, info, timeline, presets_tried)


	def _protocol_dict_label(protocol: dict) -> str:
	"""Human-readable label for a protocol dict (PCR or ELISA)."""
	if "coating_hr" in protocol:
	return f"{protocol.get('coating_hr', '?')}hr / {protocol.get('temp', '?')}°C / {protocol.get('block', '?')}"
	return f"{protocol.get('temp', '?')}°C / {protocol.get('cycles', '?')} cyc / {protocol.get('ratio', '?')}"


	def _trace_research_generate_episode(env: LabEnv, seed: int, max_trials: int = 6) -> dict:
	"""Run Research & Generate agent (research → generate any protocol → run → learn). Works for PCR, ELISA, etc."""
	if not HAS_RESEARCH_GENERATE_AGENT:
	return _build_run_result(
	env, env._info(),
	[{"title": "Research & Generate agent unavailable", "description": "Install openai and set OPENAI_API_KEY", "time": "0 min", "status": "fail", "icon": "run"}],
	{},
	)
	if env.spec.evaluate_custom_protocol is None:
	return _build_run_result(
	env, env._info(),
	[{"title": "Research & Generate", "description": "This workflow does not support custom protocols.", "time": "0 min", "status": "fail", "icon": "run"}],
	{},
	)
	agent = ResearchGenerateAgent(max_trials=max_trials)
	agent.run_episode(env, seed=seed, verbose=False)
	info = env._info()
	timeline: list[dict] = []
	preset_statuses: list[dict[str, Any]] = []
	for i, entry in enumerate(agent.feedback_history):
	protocol = entry.get("protocol", {})
	result = entry.get("result", "fail")
	label = _protocol_dict_label(protocol)
	timeline.append({
	"title": "Research & Generate",
	"description": f"Generated: {label}",
	"time": f"{info.get('elapsed_minutes', 0):.0f} min",
	"status": "pending",
	"icon": "research",
	})
	timeline.append({
	"title": "Run Assay",
	"description": _result_description(result),
	"time": f"{info.get('elapsed_minutes', 0):.0f} min",
	"status": result,
	"icon": "run",
	})
	row: dict[str, Any] = {"id": str(i), "status": result, "label": label}
	if "temp" in protocol:
	row["temp"] = protocol.get("temp")
	row["cycles"] = protocol.get("cycles")
	row["ratio"] = protocol.get("ratio", "")
	if "coating_hr" in protocol:
	row["coating_hr"] = protocol.get("coating_hr")
	row["block"] = protocol.get("block", "")
	preset_statuses.append(row)
	timeline.append({
	"title": "Finish",
	"description": f"Best result: {info.get('best_result', 'none')}",
	"time": f"{info.get('elapsed_minutes', 0):.0f} min",
	"status": info["best_result"] if info["best_result"] in ("success", "partial") else "fail",
	"icon": "finish",
	})
	return {
	"state": {
	"elapsed_minutes": info["elapsed_minutes"],
	"remaining_budget": info["remaining_budget"],
	"inventory": info["inventory"],
	"best_result": info["best_result"],
	"max_time": getattr(env.spec, "max_minutes", 240),
	"max_budget": getattr(env.spec, "initial_budget", 500),
	},
	"timeline": timeline,
	"presets": preset_statuses,
	"reward": float(INITIAL_BUDGET - info["remaining_budget"]),
	"best_result": info["best_result"],
	}


	# ──────────────────────────────────────────────
	# Training endpoint (SSE stream)
	# ──────────────────────────────────────────────

	@app.post("/api/training/start")
	async def training_start(req: TrainRequest):
	global rl_agent, _trained_agents

	def generate():
	global rl_agent, _trained_agents
	spec = get_spec_for_workflow(req.workflow_id)
	agent = ReinforceAgent(lr=req.lr, max_trials=req.max_trials, spec=spec)
	train_env = LabEnv(spec=spec)

	window_rewards: list[float] = []
	window_successes: list[float] = []
	chart_data: list[dict] = []
	log_interval = max(req.episodes // 40, 10)

	for ep in range(1, req.episodes + 1):
	result = agent.run_episode(train_env, seed=42 + ep, train=True)
	window_rewards.append(result["reward"])
	window_successes.append(float(result["success"]))

	if ep % log_interval == 0 or ep == req.episodes:
	avg_reward = sum(window_rewards) / len(window_rewards)
	avg_success = sum(window_successes) / len(window_successes) * 100
	chart_data.append({
	"episode": ep,
	"reward": round(avg_reward, 2),
	"successRate": round(avg_success, 1),
	})
	progress = round(ep / req.episodes * 100)
	event = {
	"type": "progress",
	"episode": ep,
	"total": req.episodes,
	"progress": progress,
	"reward": round(avg_reward, 2),
	"successRate": round(avg_success, 1),
	"chartData": chart_data,
	}
	yield f"data: {json.dumps(event)}\n\n"
	window_rewards.clear()
	window_successes.clear()

	rl_agent = agent
	_trained_agents[req.workflow_id] = agent

	eval_seed = 999_999
	rl_results = [agent.run_episode(train_env, seed=eval_seed + i, train=False) for i in range(req.eval_episodes)]
	naive = NaiveAgent(num_trials=3, seed=0)
	naive_results = []
	for i in range(req.eval_episodes):
	obs, info = train_env.reset(seed=eval_seed + i)
	naive.reset()
	total_r = 0.0
	while True:
	a = naive.select_action(obs)
	obs, r, d, t, info = train_env.step(a)
	total_r += r
	if d or t:
	break
	naive_results.append({"reward": total_r, "success": info["best_result"] == "success",
	"partial": info["best_result"] == "partial",
	"minutes": info["elapsed_minutes"],
	"cost": 500.0 - info["remaining_budget"]})

	train_env.close()
	n_rl = len(rl_results)
	n_nv = len(naive_results)

	def agg(res, n):
	return {
	"reward": round(sum(r["reward"] for r in res) / n, 1),
	"success": round(sum(r["success"] for r in res) / n * 100, 1),
	"partial": round(sum(r["partial"] for r in res) / n * 100, 1),
	"minutes": round(sum(r["minutes"] for r in res) / n, 0),
	"cost": round(sum(r["cost"] for r in res) / n, 1),
	}

	rl_s = agg(rl_results, n_rl)
	nv_s = agg(naive_results, n_nv)

	def imp(rl_v, nv_v):
	if nv_v == 0:
	return None
	return round((rl_v - nv_v) / abs(nv_v) * 100)

	comparison = [
	{"metric": "Avg Reward", "reinforce": rl_s["reward"], "baseline": nv_s["reward"], "improvement": imp(rl_s["reward"], nv_s["reward"]), "unit": ""},
	{"metric": "Success Rate", "reinforce": rl_s["success"], "baseline": nv_s["success"], "improvement": imp(rl_s["success"], nv_s["success"]), "unit": "%"},
	{"metric": "Partial Rate", "reinforce": rl_s["partial"], "baseline": nv_s["partial"], "improvement": imp(rl_s["partial"], nv_s["partial"]), "unit": "%"},
	{"metric": "Avg Time", "reinforce": rl_s["minutes"], "baseline": nv_s["minutes"], "improvement": imp(nv_s["minutes"], rl_s["minutes"]), "unit": "min"},
	{"metric": "Avg Cost", "reinforce": rl_s["cost"], "baseline": nv_s["cost"], "improvement": imp(nv_s["cost"], rl_s["cost"]), "unit": "$"},
	]

	final_event = {
	"type": "done",
	"chartData": chart_data,
	"comparison": comparison,
	}
	yield f"data: {json.dumps(final_event)}\n\n"

	return StreamingResponse(generate(), media_type="text/event-stream")


	# ──────────────────────────────────────────────
	# Run endpoints
	# ──────────────────────────────────────────────

	@app.post("/api/run/ai")
	async def run_ai(req: RunRequest):
	global rl_agent, _trained_agents
	env = _get_env(req.workflow_id)
	agent = _trained_agents.get(req.workflow_id) or rl_agent
	if agent is None:
	spec = get_spec_for_workflow(req.workflow_id)
	agent = ReinforceAgent(max_trials=4, spec=spec)
	rl_agent = agent
	_trained_agents[req.workflow_id] = agent
	return _trace_episode(env, agent, seed=req.seed)


	@app.post("/api/run/naive")
	async def run_naive(req: RunRequest):
	env = _get_env(req.workflow_id)
	agent = NaiveAgent(num_trials=3, seed=req.seed)
	return _trace_naive_episode(env, agent, seed=req.seed)


	@app.post("/api/run/research")
	async def run_research(req: RunRequest):
	"""Run Research LLM agent (research → hypothesize → experiment → learn). PCR workflow only."""
	env = _get_env(req.workflow_id)
	return _trace_research_episode(env, seed=req.seed, max_trials=5)


	@app.post("/api/run/research-generate")
	async def run_research_generate(req: RunRequest):
	"""Run Research & Generate agent (research → generate any protocol → run → learn). PCR, ELISA, any spec with evaluate_custom_protocol."""
	env = _get_env(req.workflow_id)
	return _trace_research_generate_episode(env, seed=req.seed, max_trials=6)


	# ──────────────────────────────────────────────
	# Step-by-step endpoint
	# ──────────────────────────────────────────────

	@app.post("/api/env/reset")
	async def env_reset(req: RunRequest):
	env = _get_env(req.workflow_id)
	obs, info = env.reset(seed=req.seed)
	return _env_state_dict(env)


	@app.post("/api/env/step")
	async def env_step(req: StepRequest):
	env = _get_env(req.workflow_id)
	obs, reward, terminated, truncated, info = env.step(req.action)
	return {
	**_env_state_dict(env),
	"reward": float(reward),
	"terminated": terminated,
	"truncated": truncated,
	}


	# ──────────────────────────────────────────────
	# Stats endpoint
	# ──────────────────────────────────────────────

	@app.get("/api/stats")
	async def get_stats():
	n_runs = len(run_history)
	if n_runs == 0:
	return {
	"active_workflows": 1,
	"total_experiments": 0,
	"success_rate": "—",
	"budget_spent": "$0",
	}
	successes = sum(1 for r in run_history if r.get("best_result") == "success")
	return {
	"active_workflows": 1,
	"total_experiments": n_runs,
	"success_rate": f"{successes / n_runs:.0%}",
	"budget_spent": f"${sum(r.get('cost', 0) for r in run_history):.0f}",
	}


	if __name__ == "__main__":
	import uvicorn
	uvicorn.run(app, host="0.0.0.0", port=8000)