---
title: ReplicaLab Architecture
---
flowchart TB
subgraph SCENARIO["Scenario Generation"]
direction LR
PT["Paper Templates
Cell Bio · ML Benchmark · Psych"]
CG["Constraint Generator
Equipment · Budget · Staff · Calendar"]
SE["Scenario Engine
Seed → Deterministic World"]
PT --> SE
CG --> SE
end
subgraph ENV["ReplicaLab Environment (OpenEnv)"]
direction TB
STATE["Environment State
Paper · Constraints · Round · Budget
Protocol · History · Done Flag"]
subgraph AGENTS["Agent Loop"]
direction LR
SCI["🔬 Scientist Agent
Trainable LLM Policy
Actions: propose · revise
ask · accept"]
LM["🏗️ Lab Manager Agent
Rule-Based Policy
Actions: report · suggest
reject · accept"]
SCI -- "Proposal /
Question" --> LM
LM -- "Constraint /
Substitution" --> SCI
end
subgraph JUDGE["Judge Engine"]
direction LR
RUBRIC["Rubric Scorer
Deterministic"]
EXPLAIN["Explanation Layer
Optional LLM"]
RUBRIC --> EXPLAIN
end
STATE --> AGENTS
AGENTS -- "step()" --> STATE
STATE -- "Episode End" --> JUDGE
end
subgraph REWARD["Reward Computation"]
direction LR
R["Rigor
Score"]
FE["Feasibility
Score"]
FI["Fidelity
Score"]
BONUS["Efficiency +
Communication
Bonus"]
PEN["Penalties
Timeout · Over Budget
Missing Controls"]
TOTAL["Total Reward
10 × R × Fe × Fi
+ Bonus − Penalties"]
R --> TOTAL
FE --> TOTAL
FI --> TOTAL
BONUS --> TOTAL
PEN --> TOTAL
end
subgraph TRAINING["RL Training Pipeline"]
direction LR
COLAB["Google Colab
TRL / Unsloth · GRPO"]
ROLLOUT["Rollout Loop
reset() → step() → reward"]
CURVES["Reward Curves
Before vs After"]
COLAB --> ROLLOUT --> CURVES
end
subgraph SERVING["Deployment & Serving"]
direction LR
FASTAPI["FastAPI +
WebSocket Server"]
DOCKER["Docker Container"]
HF["Hugging Face Space
sdk: docker · port: 7860"]
FASTAPI --> DOCKER --> HF
end
subgraph UI["Frontend"]
direction LR
REACT["React + Vite UI"]
FALLBACK["OpenEnv /web
Fallback"]
subgraph PANELS["Layout"]
direction TB
LEFT["Left Panel
Paper · Seed · Round"]
MID["Middle Panel
Negotiation Log"]
RIGHT["Right Panel
Protocol · Budget · Scores"]
end
REACT --> PANELS
FALLBACK --> PANELS
end
SE -- "reset(seed)" --> ENV
JUDGE -- "Scores" --> REWARD
TOTAL -- "Reward Signal" --> TRAINING
ROLLOUT -- "Episodes" --> ENV
ENV -- "API" --> FASTAPI
FASTAPI -- "WebSocket" --> REACT
FASTAPI -- "WebSocket" --> FALLBACK
TRAINING -. "Updated Scientist
Policy Weights" .-> SCI
classDef scenario fill:#3b82f6,stroke:#1d4ed8,color:#fff
classDef env fill:#1e293b,stroke:#475569,color:#e2e8f0
classDef agent fill:#8b5cf6,stroke:#6d28d9,color:#fff
classDef judge fill:#f59e0b,stroke:#d97706,color:#1e293b
classDef reward fill:#10b981,stroke:#059669,color:#fff
classDef training fill:#ef4444,stroke:#dc2626,color:#fff
classDef serving fill:#6366f1,stroke:#4f46e5,color:#fff
classDef ui fill:#ec4899,stroke:#db2777,color:#fff
classDef panel fill:#fdf2f8,stroke:#ec4899,color:#1e293b
class PT,CG,SE scenario
class STATE env
class SCI,LM agent
class RUBRIC,EXPLAIN judge
class R,FE,FI,BONUS,PEN,TOTAL reward
class COLAB,ROLLOUT,CURVES training
class FASTAPI,DOCKER,HF serving
class REACT,FALLBACK ui
class LEFT,MID,RIGHT panel