Spaces:

openenv-community
/

replicalab

Running

File size: 15,818 Bytes

80d8c84

import type {
  EpisodeState,
  ResetParams,
  ScientistAction,
  NegotiationMessage,
  ScoreBreakdown,
  JudgeAudit,
  PaperSummary,
  LabConstraints,
  BackendResetResponse,
  BackendStepResult,
  BackendObservation,
  BackendConversationEntry,
  BackendRewardBreakdown,
  BackendScenarioFamily,
  ScenarioTemplate,
  Difficulty,
  EpisodeStepTrace,
  BackendRuntimeStatus,
} from '@/types';

const BASE_URL = import.meta.env.VITE_API_BASE_URL ?? '/api';
const WS_URL =
  import.meta.env.VITE_WS_URL ??
  `${window.location.protocol === 'https:' ? 'wss:' : 'ws:'}//${window.location.host}/ws`;

function backendUnavailableMessage(context: string): string {
  return `${context}: backend unavailable at ${BASE_URL}. Start the API server with "python -m uvicorn server.app:app --host 127.0.0.1 --port 7860" and refresh.`;
}

function normalizeFetchError(error: unknown, context: string): Error {
  if (error instanceof Error && /Failed to fetch/i.test(error.message)) {
    return new Error(backendUnavailableMessage(context));
  }
  if (error instanceof Error) {
    return error;
  }
  return new Error(`${context}: unknown network error`);
}

// ---------------------------------------------------------------------------
// Adapter helpers: transform backend shapes into frontend types
// ---------------------------------------------------------------------------

function adaptConversation(entries: BackendConversationEntry[]): NegotiationMessage[] {
  return entries.map((e) => ({
    role: e.role as NegotiationMessage['role'],
    round: e.round_number,
    action_type: e.action_type ?? undefined,
    message: e.message,
    timestamp: Date.now(), // backend doesn't provide timestamps
  }));
}

function adaptRewardBreakdown(rb: BackendRewardBreakdown, totalReward: number): ScoreBreakdown {
  const penaltyEntries = Object.entries(rb.penalties);
  const penaltyTotal = penaltyEntries.reduce((sum, [, v]) => sum + v, 0);
  return {
    rigor: rb.rigor,
    feasibility: rb.feasibility,
    fidelity: rb.fidelity,
    parsimony: rb.parsimony,
    total_reward: totalReward,
    efficiency_bonus: rb.efficiency_bonus,
    communication_bonus: rb.communication_bonus,
    penalties: penaltyTotal,
    penalty_reasons: penaltyEntries.map(([k, v]) => `${k}: ${v}`),
  };
}

function adaptPaper(obs: BackendObservation): PaperSummary {
  const sci = obs.scientist!;
  // The backend doesn't return original protocol values separately.
  // We extract what's available from the scientist observation and use
  // sensible defaults for the "original" paper protocol fields.
  return {
    title: sci.paper_title,
    hypothesis: sci.paper_hypothesis,
    method: sci.paper_method,
    key_finding: sci.paper_key_finding,
    original_sample_size: sci.current_protocol?.sample_size ?? 0,
    original_technique: sci.current_protocol?.technique ?? 'N/A',
    original_controls: sci.current_protocol?.controls ?? [],
    original_duration_days: sci.current_protocol?.duration_days ?? 0,
  };
}

function adaptLabConstraints(obs: BackendObservation): LabConstraints {
  const lab = obs.lab_manager!;
  return {
    budget: lab.budget_total,
    budget_remaining: lab.budget_remaining,
    equipment_available: lab.equipment_available,
    reagents_available: lab.reagents_in_stock,
    staff_count: lab.staff_count,
    booking_conflicts: lab.equipment_booked,
    safety_rules: lab.safety_restrictions,
    time_limit_days: lab.time_limit_days,
  };
}

function observationToEpisodeState(
  obs: BackendObservation,
  sessionId: string,
  episodeId: string,
  seed: number,
  template: ScenarioTemplate,
  difficulty: Difficulty,
): EpisodeState {
  const sci = obs.scientist!;
  return {
    episode_id: episodeId,
    session_id: sessionId,
    seed,
    template,
    difficulty,
    round: sci.round_number,
    max_rounds: sci.max_rounds,
    done: false,
    paper: adaptPaper(obs),
    lab_constraints: adaptLabConstraints(obs),
    protocol: sci.current_protocol,
    conversation: adaptConversation(sci.conversation_history),
    scores: null,
    judge_audit: null,
    cumulative_reward: 0,
    step_history: [],
  };
}

function buildRoundTrace(
  prevState: EpisodeState,
  data: BackendStepResult,
): EpisodeStepTrace {
  const round = data.info.round;
  const rawHistory = data.observation?.scientist?.conversation_history ?? [];
  const roundEntries = rawHistory.filter((entry) => entry.round_number === round);
  const scientistEntry = roundEntries.find((entry) => entry.role === 'scientist');
  const labManagerEntry = roundEntries.find((entry) => entry.role === 'lab_manager');

  return {
    round,
    reward: data.reward,
    cumulative_reward: data.info.cumulative_reward ?? prevState.cumulative_reward + data.reward,
    action_type: scientistEntry?.action_type ?? 'unknown',
    scientist_message: scientistEntry?.message ?? '',
    lab_manager_action_type: labManagerEntry?.action_type ?? undefined,
    lab_manager_message: labManagerEntry?.message ?? undefined,
    step_reward_components: data.info.step_reward_components ?? {},
    protocol: data.observation?.scientist?.current_protocol ?? prevState.protocol,
    oracle_round_score: data.info.oracle_round_score ?? null,
    oracle_post_mortem: data.info.oracle_post_mortem ?? null,
    oracle_event: data.info.oracle_event ?? null,
  };
}

// ---------------------------------------------------------------------------
// REST API functions
// ---------------------------------------------------------------------------

export async function healthCheck(): Promise<{ status: string; env?: string; version?: string }> {
  try {
    const res = await fetch(`${BASE_URL}/health`);
    if (!res.ok) {
      throw new Error(`Health check failed: ${res.status}`);
    }
    return res.json();
  } catch (error) {
    throw normalizeFetchError(error, 'Health check failed');
  }
}

export async function getRuntimeStatus(): Promise<BackendRuntimeStatus> {
  try {
    const res = await fetch(`${BASE_URL}/runtime`);
    if (!res.ok) {
      throw new Error(`Failed to fetch runtime status: ${res.status}`);
    }
    return res.json();
  } catch (error) {
    throw normalizeFetchError(error, 'Failed to fetch runtime status');
  }
}

export async function getScenarios(): Promise<BackendScenarioFamily[]> {
  try {
    const res = await fetch(`${BASE_URL}/scenarios`);
    if (!res.ok) throw new Error('Failed to fetch scenarios');
    const data = await res.json();
    return data.scenarios;
  } catch (error) {
    throw normalizeFetchError(error, 'Failed to fetch scenarios');
  }
}

export async function resetEpisode(params: ResetParams): Promise<EpisodeState> {
  const seed = params.seed ?? Math.floor(Math.random() * 10000);
  const template = params.template ?? 'math_reasoning';
  const difficulty = params.difficulty ?? 'easy';

  try {
    const res = await fetch(`${BASE_URL}/reset`, {
      method: 'POST',
      headers: { 'Content-Type': 'application/json' },
      body: JSON.stringify({ seed, scenario: template, difficulty }),
    });
    if (!res.ok) {
      const text = await res.text();
      throw new Error(`Failed to reset episode: ${text}`);
    }
    const data: BackendResetResponse = await res.json();
    return observationToEpisodeState(
      data.observation,
      data.session_id,
      data.episode_id,
      seed,
      template,
      difficulty,
    );
  } catch (error) {
    throw normalizeFetchError(error, 'Failed to reset episode');
  }
}

export async function stepEpisode(
  sessionId: string,
  action: ScientistAction,
  prevState: EpisodeState,
): Promise<EpisodeState> {
  let data: BackendStepResult;
  try {
    const res = await fetch(`${BASE_URL}/step`, {
      method: 'POST',
      headers: { 'Content-Type': 'application/json' },
      body: JSON.stringify({ session_id: sessionId, action }),
    });
    if (!res.ok) {
      const text = await res.text();
      throw new Error(`Failed to step episode: ${text}`);
    }
    data = await res.json();
  } catch (error) {
    throw normalizeFetchError(error, 'Failed to step episode');
  }
  const info = data.info;

  // Build scores if done and reward breakdown is available
  let scores: ScoreBreakdown | null = null;
  let judgeAudit: JudgeAudit | null = null;
  if (data.done && info.reward_breakdown) {
    scores = adaptRewardBreakdown(info.reward_breakdown, data.reward);
    judgeAudit = {
      verdict: info.verdict ?? 'unknown',
      judge_notes: info.judge_notes ? [info.judge_notes] : [],
      top_failure_reasons: info.top_failure_reasons,
      score_breakdown: scores,
    };
  }

  // Get updated conversation from the observation
  const obs = data.observation;
  const conversation = obs?.scientist
    ? adaptConversation(obs.scientist.conversation_history)
    : prevState.conversation;
  const protocol = obs?.scientist?.current_protocol ?? prevState.protocol;
  const round = obs?.scientist?.round_number ?? prevState.round + 1;
  const cumulativeReward = data.info.cumulative_reward ?? prevState.cumulative_reward + data.reward;

  // Update lab constraints if available
  const labConstraints = obs ? adaptLabConstraints(obs) : prevState.lab_constraints;
  const roundTrace = buildRoundTrace(prevState, data);

  return {
    ...prevState,
    round,
    done: data.done,
    protocol,
    conversation,
    lab_constraints: labConstraints,
    scores,
    judge_audit: judgeAudit,
    cumulative_reward: cumulativeReward,
    step_history: [...prevState.step_history, roundTrace],
  };
}

export async function agentStepEpisode(
  sessionId: string,
  prevState: EpisodeState,
): Promise<EpisodeState> {
  let data: BackendStepResult;
  try {
    const res = await fetch(`${BASE_URL}/agent-step`, {
      method: 'POST',
      headers: { 'Content-Type': 'application/json' },
      body: JSON.stringify({ session_id: sessionId }),
    });
    if (!res.ok) {
      const text = await res.text();
      throw new Error(`Failed to run model-backed scientist step: ${text}`);
    }
    data = await res.json();
  } catch (error) {
    throw normalizeFetchError(error, 'Failed to run model-backed scientist step');
  }

  let scores: ScoreBreakdown | null = null;
  let judgeAudit: JudgeAudit | null = null;
  if (data.done && data.info.reward_breakdown) {
    scores = adaptRewardBreakdown(data.info.reward_breakdown, data.reward);
    judgeAudit = {
      verdict: data.info.verdict ?? 'unknown',
      judge_notes: data.info.judge_notes ? [data.info.judge_notes] : [],
      top_failure_reasons: data.info.top_failure_reasons,
      score_breakdown: scores,
    };
  }

  const obs = data.observation;
  const conversation = obs?.scientist
    ? adaptConversation(obs.scientist.conversation_history)
    : prevState.conversation;
  const protocol = obs?.scientist?.current_protocol ?? prevState.protocol;
  const round = obs?.scientist?.round_number ?? prevState.round + 1;
  const cumulativeReward = data.info.cumulative_reward ?? prevState.cumulative_reward + data.reward;
  const labConstraints = obs ? adaptLabConstraints(obs) : prevState.lab_constraints;
  const roundTrace = buildRoundTrace(prevState, data);

  return {
    ...prevState,
    round,
    done: data.done,
    protocol,
    conversation,
    lab_constraints: labConstraints,
    scores,
    judge_audit: judgeAudit,
    cumulative_reward: cumulativeReward,
    step_history: [...prevState.step_history, roundTrace],
  };
}

export async function getReplay(episodeId: string): Promise<unknown> {
  try {
    const res = await fetch(`${BASE_URL}/replay/${episodeId}`);
    if (!res.ok) throw new Error('Failed to fetch replay');
    return res.json();
  } catch (error) {
    throw normalizeFetchError(error, 'Failed to fetch replay');
  }
}

// ---------------------------------------------------------------------------
// WebSocket support
// ---------------------------------------------------------------------------

export type WebSocketMessage =
  | { type: 'reset'; params: ResetParams }
  | { type: 'step'; action: ScientistAction }
  | { type: 'state' };

export type WebSocketResponse =
  | { type: 'reset_ok'; episode_id: string; observation: BackendObservation }
  | { type: 'step_ok'; observation: BackendObservation | null; reward: number; done: boolean; info: Record<string, unknown> }
  | { type: 'pong' }
  | { type: 'error'; message: string };

export function createWebSocket(
  onMessage: (msg: WebSocketResponse) => void,
  onOpen?: () => void,
  onClose?: () => void,
  onError?: (err: Event) => void,
): WebSocket {
  const ws = new WebSocket(WS_URL);

  ws.onopen = () => onOpen?.();
  ws.onclose = () => onClose?.();
  ws.onerror = (e) => onError?.(e);
  ws.onmessage = (event) => {
    try {
      const msg = JSON.parse(event.data) as WebSocketResponse;
      onMessage(msg);
    } catch {
      console.error('Failed to parse WebSocket message:', event.data);
    }
  };

  return ws;
}

export function sendWsMessage(ws: WebSocket, msg: WebSocketMessage) {
  if (ws.readyState === WebSocket.OPEN) {
    ws.send(JSON.stringify(msg));
  }
}

// ---------------------------------------------------------------------------
// Default scientist action (for auto-step)
// ---------------------------------------------------------------------------

export function buildDefaultScientistAction(state?: EpisodeState): ScientistAction {
  const durationLimit = Math.max(1, state?.lab_constraints.time_limit_days ?? 3);
  const template = state?.template;
  const currentProtocol = state?.protocol;

  const originalDuration = state?.paper.original_duration_days ?? 0;
  const preferredDuration = currentProtocol?.duration_days ?? (originalDuration || durationLimit);
  const durationDays = Math.max(
    1,
    Math.min(durationLimit, preferredDuration),
  );

  const technique =
    currentProtocol?.technique
      ?? (state?.paper.original_technique && state.paper.original_technique !== 'N/A'
        ? state.paper.original_technique
        : template === 'math_reasoning'
          ? 'structured_proof_check'
          : template === 'finance_trading'
            ? 'offline_backtest'
            : 'published_training_recipe');

  const controls =
    currentProtocol?.controls.length
      ? currentProtocol.controls
      : state?.paper.original_controls.length
        ? state.paper.original_controls
        : ['baseline'];

  const baseSampleSize = currentProtocol?.sample_size ?? 3;
  const sampleSize =
    state?.round && state.round > 0
      ? Math.max(3, Math.min(baseSampleSize + (state.round % 2 === 0 ? 1 : -1), 12))
      : template === 'math_reasoning'
        ? 4
        : 3;

  const requiredEquipment = currentProtocol?.required_equipment.length
    ? currentProtocol.required_equipment
    : state?.lab_constraints.equipment_available.slice(0, 1) ?? [];
  const requiredReagents = currentProtocol?.required_reagents.length
    ? currentProtocol.required_reagents
    : state?.lab_constraints.reagents_available.slice(0, 1) ?? [];

  return {
    action_type: currentProtocol ? 'revise_protocol' : 'propose_protocol',
    sample_size: sampleSize,
    controls,
    technique,
    duration_days: durationDays,
    required_equipment: requiredEquipment,
    required_reagents: requiredReagents,
    questions: [],
    rationale: currentProtocol
      ? `Refine the existing protocol for round ${state?.round ?? 0} while staying inside the ${durationLimit}-day lab window.`
      : `Replicate the source result within the available lab window of ${durationLimit} days using currently available resources.`,
  };
}

export function buildAcceptAction(): ScientistAction {
  return {
    action_type: 'accept',
    sample_size: 0,
    controls: [],
    technique: '',
    duration_days: 0,
    required_equipment: [],
    required_reagents: [],
    questions: [],
    rationale: '',
  };
}