maxxie114's picture
Initial HF Spaces deployment
80d8c84
import type {
EpisodeState,
ResetParams,
ScientistAction,
NegotiationMessage,
ScoreBreakdown,
JudgeAudit,
PaperSummary,
LabConstraints,
BackendResetResponse,
BackendStepResult,
BackendObservation,
BackendConversationEntry,
BackendRewardBreakdown,
BackendScenarioFamily,
ScenarioTemplate,
Difficulty,
EpisodeStepTrace,
BackendRuntimeStatus,
} from '@/types';
const BASE_URL = import.meta.env.VITE_API_BASE_URL ?? '/api';
const WS_URL =
import.meta.env.VITE_WS_URL ??
`${window.location.protocol === 'https:' ? 'wss:' : 'ws:'}//${window.location.host}/ws`;
function backendUnavailableMessage(context: string): string {
return `${context}: backend unavailable at ${BASE_URL}. Start the API server with "python -m uvicorn server.app:app --host 127.0.0.1 --port 7860" and refresh.`;
}
function normalizeFetchError(error: unknown, context: string): Error {
if (error instanceof Error && /Failed to fetch/i.test(error.message)) {
return new Error(backendUnavailableMessage(context));
}
if (error instanceof Error) {
return error;
}
return new Error(`${context}: unknown network error`);
}
// ---------------------------------------------------------------------------
// Adapter helpers: transform backend shapes into frontend types
// ---------------------------------------------------------------------------
function adaptConversation(entries: BackendConversationEntry[]): NegotiationMessage[] {
return entries.map((e) => ({
role: e.role as NegotiationMessage['role'],
round: e.round_number,
action_type: e.action_type ?? undefined,
message: e.message,
timestamp: Date.now(), // backend doesn't provide timestamps
}));
}
function adaptRewardBreakdown(rb: BackendRewardBreakdown, totalReward: number): ScoreBreakdown {
const penaltyEntries = Object.entries(rb.penalties);
const penaltyTotal = penaltyEntries.reduce((sum, [, v]) => sum + v, 0);
return {
rigor: rb.rigor,
feasibility: rb.feasibility,
fidelity: rb.fidelity,
parsimony: rb.parsimony,
total_reward: totalReward,
efficiency_bonus: rb.efficiency_bonus,
communication_bonus: rb.communication_bonus,
penalties: penaltyTotal,
penalty_reasons: penaltyEntries.map(([k, v]) => `${k}: ${v}`),
};
}
function adaptPaper(obs: BackendObservation): PaperSummary {
const sci = obs.scientist!;
// The backend doesn't return original protocol values separately.
// We extract what's available from the scientist observation and use
// sensible defaults for the "original" paper protocol fields.
return {
title: sci.paper_title,
hypothesis: sci.paper_hypothesis,
method: sci.paper_method,
key_finding: sci.paper_key_finding,
original_sample_size: sci.current_protocol?.sample_size ?? 0,
original_technique: sci.current_protocol?.technique ?? 'N/A',
original_controls: sci.current_protocol?.controls ?? [],
original_duration_days: sci.current_protocol?.duration_days ?? 0,
};
}
function adaptLabConstraints(obs: BackendObservation): LabConstraints {
const lab = obs.lab_manager!;
return {
budget: lab.budget_total,
budget_remaining: lab.budget_remaining,
equipment_available: lab.equipment_available,
reagents_available: lab.reagents_in_stock,
staff_count: lab.staff_count,
booking_conflicts: lab.equipment_booked,
safety_rules: lab.safety_restrictions,
time_limit_days: lab.time_limit_days,
};
}
function observationToEpisodeState(
obs: BackendObservation,
sessionId: string,
episodeId: string,
seed: number,
template: ScenarioTemplate,
difficulty: Difficulty,
): EpisodeState {
const sci = obs.scientist!;
return {
episode_id: episodeId,
session_id: sessionId,
seed,
template,
difficulty,
round: sci.round_number,
max_rounds: sci.max_rounds,
done: false,
paper: adaptPaper(obs),
lab_constraints: adaptLabConstraints(obs),
protocol: sci.current_protocol,
conversation: adaptConversation(sci.conversation_history),
scores: null,
judge_audit: null,
cumulative_reward: 0,
step_history: [],
};
}
function buildRoundTrace(
prevState: EpisodeState,
data: BackendStepResult,
): EpisodeStepTrace {
const round = data.info.round;
const rawHistory = data.observation?.scientist?.conversation_history ?? [];
const roundEntries = rawHistory.filter((entry) => entry.round_number === round);
const scientistEntry = roundEntries.find((entry) => entry.role === 'scientist');
const labManagerEntry = roundEntries.find((entry) => entry.role === 'lab_manager');
return {
round,
reward: data.reward,
cumulative_reward: data.info.cumulative_reward ?? prevState.cumulative_reward + data.reward,
action_type: scientistEntry?.action_type ?? 'unknown',
scientist_message: scientistEntry?.message ?? '',
lab_manager_action_type: labManagerEntry?.action_type ?? undefined,
lab_manager_message: labManagerEntry?.message ?? undefined,
step_reward_components: data.info.step_reward_components ?? {},
protocol: data.observation?.scientist?.current_protocol ?? prevState.protocol,
oracle_round_score: data.info.oracle_round_score ?? null,
oracle_post_mortem: data.info.oracle_post_mortem ?? null,
oracle_event: data.info.oracle_event ?? null,
};
}
// ---------------------------------------------------------------------------
// REST API functions
// ---------------------------------------------------------------------------
export async function healthCheck(): Promise<{ status: string; env?: string; version?: string }> {
try {
const res = await fetch(`${BASE_URL}/health`);
if (!res.ok) {
throw new Error(`Health check failed: ${res.status}`);
}
return res.json();
} catch (error) {
throw normalizeFetchError(error, 'Health check failed');
}
}
export async function getRuntimeStatus(): Promise<BackendRuntimeStatus> {
try {
const res = await fetch(`${BASE_URL}/runtime`);
if (!res.ok) {
throw new Error(`Failed to fetch runtime status: ${res.status}`);
}
return res.json();
} catch (error) {
throw normalizeFetchError(error, 'Failed to fetch runtime status');
}
}
export async function getScenarios(): Promise<BackendScenarioFamily[]> {
try {
const res = await fetch(`${BASE_URL}/scenarios`);
if (!res.ok) throw new Error('Failed to fetch scenarios');
const data = await res.json();
return data.scenarios;
} catch (error) {
throw normalizeFetchError(error, 'Failed to fetch scenarios');
}
}
export async function resetEpisode(params: ResetParams): Promise<EpisodeState> {
const seed = params.seed ?? Math.floor(Math.random() * 10000);
const template = params.template ?? 'math_reasoning';
const difficulty = params.difficulty ?? 'easy';
try {
const res = await fetch(`${BASE_URL}/reset`, {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ seed, scenario: template, difficulty }),
});
if (!res.ok) {
const text = await res.text();
throw new Error(`Failed to reset episode: ${text}`);
}
const data: BackendResetResponse = await res.json();
return observationToEpisodeState(
data.observation,
data.session_id,
data.episode_id,
seed,
template,
difficulty,
);
} catch (error) {
throw normalizeFetchError(error, 'Failed to reset episode');
}
}
export async function stepEpisode(
sessionId: string,
action: ScientistAction,
prevState: EpisodeState,
): Promise<EpisodeState> {
let data: BackendStepResult;
try {
const res = await fetch(`${BASE_URL}/step`, {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ session_id: sessionId, action }),
});
if (!res.ok) {
const text = await res.text();
throw new Error(`Failed to step episode: ${text}`);
}
data = await res.json();
} catch (error) {
throw normalizeFetchError(error, 'Failed to step episode');
}
const info = data.info;
// Build scores if done and reward breakdown is available
let scores: ScoreBreakdown | null = null;
let judgeAudit: JudgeAudit | null = null;
if (data.done && info.reward_breakdown) {
scores = adaptRewardBreakdown(info.reward_breakdown, data.reward);
judgeAudit = {
verdict: info.verdict ?? 'unknown',
judge_notes: info.judge_notes ? [info.judge_notes] : [],
top_failure_reasons: info.top_failure_reasons,
score_breakdown: scores,
};
}
// Get updated conversation from the observation
const obs = data.observation;
const conversation = obs?.scientist
? adaptConversation(obs.scientist.conversation_history)
: prevState.conversation;
const protocol = obs?.scientist?.current_protocol ?? prevState.protocol;
const round = obs?.scientist?.round_number ?? prevState.round + 1;
const cumulativeReward = data.info.cumulative_reward ?? prevState.cumulative_reward + data.reward;
// Update lab constraints if available
const labConstraints = obs ? adaptLabConstraints(obs) : prevState.lab_constraints;
const roundTrace = buildRoundTrace(prevState, data);
return {
...prevState,
round,
done: data.done,
protocol,
conversation,
lab_constraints: labConstraints,
scores,
judge_audit: judgeAudit,
cumulative_reward: cumulativeReward,
step_history: [...prevState.step_history, roundTrace],
};
}
export async function agentStepEpisode(
sessionId: string,
prevState: EpisodeState,
): Promise<EpisodeState> {
let data: BackendStepResult;
try {
const res = await fetch(`${BASE_URL}/agent-step`, {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ session_id: sessionId }),
});
if (!res.ok) {
const text = await res.text();
throw new Error(`Failed to run model-backed scientist step: ${text}`);
}
data = await res.json();
} catch (error) {
throw normalizeFetchError(error, 'Failed to run model-backed scientist step');
}
let scores: ScoreBreakdown | null = null;
let judgeAudit: JudgeAudit | null = null;
if (data.done && data.info.reward_breakdown) {
scores = adaptRewardBreakdown(data.info.reward_breakdown, data.reward);
judgeAudit = {
verdict: data.info.verdict ?? 'unknown',
judge_notes: data.info.judge_notes ? [data.info.judge_notes] : [],
top_failure_reasons: data.info.top_failure_reasons,
score_breakdown: scores,
};
}
const obs = data.observation;
const conversation = obs?.scientist
? adaptConversation(obs.scientist.conversation_history)
: prevState.conversation;
const protocol = obs?.scientist?.current_protocol ?? prevState.protocol;
const round = obs?.scientist?.round_number ?? prevState.round + 1;
const cumulativeReward = data.info.cumulative_reward ?? prevState.cumulative_reward + data.reward;
const labConstraints = obs ? adaptLabConstraints(obs) : prevState.lab_constraints;
const roundTrace = buildRoundTrace(prevState, data);
return {
...prevState,
round,
done: data.done,
protocol,
conversation,
lab_constraints: labConstraints,
scores,
judge_audit: judgeAudit,
cumulative_reward: cumulativeReward,
step_history: [...prevState.step_history, roundTrace],
};
}
export async function getReplay(episodeId: string): Promise<unknown> {
try {
const res = await fetch(`${BASE_URL}/replay/${episodeId}`);
if (!res.ok) throw new Error('Failed to fetch replay');
return res.json();
} catch (error) {
throw normalizeFetchError(error, 'Failed to fetch replay');
}
}
// ---------------------------------------------------------------------------
// WebSocket support
// ---------------------------------------------------------------------------
export type WebSocketMessage =
| { type: 'reset'; params: ResetParams }
| { type: 'step'; action: ScientistAction }
| { type: 'state' };
export type WebSocketResponse =
| { type: 'reset_ok'; episode_id: string; observation: BackendObservation }
| { type: 'step_ok'; observation: BackendObservation | null; reward: number; done: boolean; info: Record<string, unknown> }
| { type: 'pong' }
| { type: 'error'; message: string };
export function createWebSocket(
onMessage: (msg: WebSocketResponse) => void,
onOpen?: () => void,
onClose?: () => void,
onError?: (err: Event) => void,
): WebSocket {
const ws = new WebSocket(WS_URL);
ws.onopen = () => onOpen?.();
ws.onclose = () => onClose?.();
ws.onerror = (e) => onError?.(e);
ws.onmessage = (event) => {
try {
const msg = JSON.parse(event.data) as WebSocketResponse;
onMessage(msg);
} catch {
console.error('Failed to parse WebSocket message:', event.data);
}
};
return ws;
}
export function sendWsMessage(ws: WebSocket, msg: WebSocketMessage) {
if (ws.readyState === WebSocket.OPEN) {
ws.send(JSON.stringify(msg));
}
}
// ---------------------------------------------------------------------------
// Default scientist action (for auto-step)
// ---------------------------------------------------------------------------
export function buildDefaultScientistAction(state?: EpisodeState): ScientistAction {
const durationLimit = Math.max(1, state?.lab_constraints.time_limit_days ?? 3);
const template = state?.template;
const currentProtocol = state?.protocol;
const originalDuration = state?.paper.original_duration_days ?? 0;
const preferredDuration = currentProtocol?.duration_days ?? (originalDuration || durationLimit);
const durationDays = Math.max(
1,
Math.min(durationLimit, preferredDuration),
);
const technique =
currentProtocol?.technique
?? (state?.paper.original_technique && state.paper.original_technique !== 'N/A'
? state.paper.original_technique
: template === 'math_reasoning'
? 'structured_proof_check'
: template === 'finance_trading'
? 'offline_backtest'
: 'published_training_recipe');
const controls =
currentProtocol?.controls.length
? currentProtocol.controls
: state?.paper.original_controls.length
? state.paper.original_controls
: ['baseline'];
const baseSampleSize = currentProtocol?.sample_size ?? 3;
const sampleSize =
state?.round && state.round > 0
? Math.max(3, Math.min(baseSampleSize + (state.round % 2 === 0 ? 1 : -1), 12))
: template === 'math_reasoning'
? 4
: 3;
const requiredEquipment = currentProtocol?.required_equipment.length
? currentProtocol.required_equipment
: state?.lab_constraints.equipment_available.slice(0, 1) ?? [];
const requiredReagents = currentProtocol?.required_reagents.length
? currentProtocol.required_reagents
: state?.lab_constraints.reagents_available.slice(0, 1) ?? [];
return {
action_type: currentProtocol ? 'revise_protocol' : 'propose_protocol',
sample_size: sampleSize,
controls,
technique,
duration_days: durationDays,
required_equipment: requiredEquipment,
required_reagents: requiredReagents,
questions: [],
rationale: currentProtocol
? `Refine the existing protocol for round ${state?.round ?? 0} while staying inside the ${durationLimit}-day lab window.`
: `Replicate the source result within the available lab window of ${durationLimit} days using currently available resources.`,
};
}
export function buildAcceptAction(): ScientistAction {
return {
action_type: 'accept',
sample_size: 0,
controls: [],
technique: '',
duration_days: 0,
required_equipment: [],
required_reagents: [],
questions: [],
rationale: '',
};
}