| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| import { test } from "node:test"; |
| import assert from "node:assert/strict"; |
|
|
| |
| |
| |
| process.env.DATABASE_URL ??= "postgres://test:test@127.0.0.1:5432/test"; |
|
|
| import type { AgentRunState as AgentRunStateType } from "./agent-supervisor"; |
|
|
| const { |
| AgentRunState, |
| CIRCUIT_BREAKER_THRESHOLD, |
| formatCarryOverPrompt, |
| } = await import("./agent-supervisor"); |
|
|
| function makeState() { |
| const events: Array<{ event: string; data: Record<string, unknown> }> = []; |
| const state = new AgentRunState({ |
| conversationId: "cnv_test", |
| userId: "usr_test", |
| messageId: "msg_test", |
| emit: (ev) => events.push(ev), |
| }); |
| return { state, events }; |
| } |
|
|
| function feedPlanWithStep(state: AgentRunStateType, stepId: string, opts: { |
| goal?: string; |
| success_criteria?: string; |
| } = {}) { |
| const plan = { |
| steps: [ |
| { |
| id: stepId, |
| goal: opts.goal ?? `Goal for ${stepId}`, |
| success_criteria: opts.success_criteria, |
| }, |
| ], |
| }; |
| state.ingestTextDelta(`<plan>${JSON.stringify(plan)}</plan>\n`); |
| state.ingestTextDelta( |
| `<step_update>${JSON.stringify({ id: stepId, status: "running" })}</step_update>\n`, |
| ); |
| } |
|
|
| |
|
|
| test("circuit breaker trips after THRESHOLD transient failures and stays open", () => { |
| const { state } = makeState(); |
| state.noteIteration(); |
|
|
| |
| assert.equal(state.circuitCheck("search_pubmed", { query: "x" }), null); |
|
|
| |
| for (let i = 0; i < CIRCUIT_BREAKER_THRESHOLD - 1; i++) { |
| state.recordToolOutcome({ |
| toolCallId: `c_${i}`, |
| toolName: "search_pubmed", |
| args: { query: "x" }, |
| result: { error: "rate limited", error_code: "rate_limited" }, |
| isError: true, |
| durationMs: 10, |
| }); |
| } |
| assert.equal(state.circuitCheck("search_pubmed", { query: "x" }), null); |
|
|
| |
| state.recordToolOutcome({ |
| toolCallId: "c_final", |
| toolName: "search_pubmed", |
| args: { query: "x" }, |
| result: { error: "rate limited", error_code: "rate_limited" }, |
| isError: true, |
| durationMs: 10, |
| }); |
|
|
| const open = state.circuitCheck("search_pubmed", { query: "anything else" }); |
| assert.ok(open, "circuit should be open"); |
| assert.equal(open!.error_code, "circuit_open"); |
| assert.equal(open!.retryable, false); |
| assert.match(open!.suggestion, /opentargets|uniprot|alternative/i); |
|
|
| |
| |
| assert.equal( |
| state.circuitCheck("search_pubmed", { source: "europepmc" }), |
| null, |
| ); |
| }); |
|
|
| test("non-transient errors do not contribute to the circuit breaker", () => { |
| const { state } = makeState(); |
| for (let i = 0; i < CIRCUIT_BREAKER_THRESHOLD + 2; i++) { |
| state.recordToolOutcome({ |
| toolCallId: `c_${i}`, |
| toolName: "lookup_uniprot", |
| args: { id: "P12345" }, |
| result: { error: "bad request", error_code: "invalid_argument" }, |
| isError: true, |
| durationMs: 5, |
| }); |
| } |
| assert.equal(state.circuitCheck("lookup_uniprot", { id: "P12345" }), null); |
| }); |
|
|
| |
|
|
| test("per-turn cache returns the same result for an identical (tool, args) call", () => { |
| const { state } = makeState(); |
| const args = { query: "BRCA1", limit: 5 }; |
| const result = { hits: [{ pmid: "12345", title: "demo" }] }; |
|
|
| |
| assert.equal(state.cacheLookup("search_pubmed", args), undefined); |
|
|
| state.recordToolOutcome({ |
| toolCallId: "c1", |
| toolName: "search_pubmed", |
| args, |
| result, |
| isError: false, |
| durationMs: 42, |
| }); |
|
|
| |
| const cached = state.cacheLookup("search_pubmed", args); |
| assert.strictEqual(cached, result); |
|
|
| |
| const cachedClone = state.cacheLookup("search_pubmed", { query: "BRCA1", limit: 5 }); |
| assert.strictEqual(cachedClone, result); |
|
|
| |
| assert.equal(state.cacheLookup("search_pubmed", { query: "TP53" }), undefined); |
|
|
| |
| state.recordToolOutcome({ |
| toolCallId: "c2", |
| toolName: "search_pubmed", |
| args: { query: "TP53" }, |
| result: { error: "boom", error_code: "internal" }, |
| isError: true, |
| durationMs: 1, |
| }); |
| assert.equal(state.cacheLookup("search_pubmed", { query: "TP53" }), undefined); |
| }); |
|
|
| |
|
|
| test("validator downgrades a 'done' step that has no successful tool route", () => { |
| const { state } = makeState(); |
| state.noteIteration(); |
| feedPlanWithStep(state, "s1", { goal: "Find something" }); |
|
|
| |
| state.ingestTextDelta( |
| `<step_update>${JSON.stringify({ id: "s1", status: "done" })}</step_update>\n`, |
| ); |
|
|
| const v = state.runValidator(); |
| assert.equal(v.passed, false); |
| assert.equal(v.downgraded.length, 1); |
| assert.equal(v.downgraded[0]!.id, "s1"); |
| assert.equal(v.downgraded[0]!.reason, "no_evidence"); |
|
|
| |
| const publicRun = state.toPublic("complete", new Date(), { |
| input: 0, |
| output: 0, |
| }); |
| const step = publicRun.steps.find((s) => s.id === "s1")!; |
| assert.equal(step.status, "failed"); |
| assert.match(step.note ?? "", /downgraded.*no_evidence/); |
|
|
| assert.equal(state.needsRecoveryRound(), true); |
| }); |
|
|
| test("validator passes when a 'done' step has a source-linked tool route", () => { |
| const { state } = makeState(); |
| state.noteIteration(); |
| feedPlanWithStep(state, "s1", { goal: "Look up paper" }); |
|
|
| state.recordToolOutcome({ |
| toolCallId: "c1", |
| toolName: "search_pubmed", |
| args: { query: "BRCA1" }, |
| |
| result: { hits: [{ pmid: "12345", url: "https://pubmed/12345" }] }, |
| isError: false, |
| durationMs: 5, |
| }); |
|
|
| state.ingestTextDelta( |
| `<step_update>${JSON.stringify({ id: "s1", status: "done" })}</step_update>\n`, |
| ); |
|
|
| const v = state.runValidator(); |
| assert.equal(v.passed, true); |
| assert.equal(v.downgraded.length, 0); |
| assert.equal(state.needsRecoveryRound(), false); |
| }); |
|
|
| |
|
|
| test("cross-turn working memory carries an unresolved step into the next turn's system prompt", () => { |
| const { state } = makeState(); |
| state.noteIteration(); |
| feedPlanWithStep(state, "s1", { goal: "Investigate target X" }); |
| state.ingestTextDelta( |
| `<step_update>${JSON.stringify({ |
| id: "s1", |
| status: "failed", |
| note: "rate limited", |
| })}</step_update>\n`, |
| ); |
|
|
| |
| const record = state.buildPendingWorkingMemory(); |
| assert.ok(record, "expected a memory record for a failed step"); |
| assert.equal(record!.unresolved_steps.length, 1); |
| assert.equal(record!.unresolved_steps[0]!.id, "s1"); |
| assert.equal(record!.unresolved_steps[0]!.goal, "Investigate target X"); |
| assert.equal(record!.unresolved_steps[0]!.note, "rate limited"); |
| assert.equal(record!.unresolved_steps[0]!.source_run_id, state.id); |
|
|
| |
| const prompt = formatCarryOverPrompt(record); |
| assert.match(prompt, /Carry-over from the previous turn/); |
| assert.match(prompt, /- s1: Investigate target X \(rate limited\)/); |
|
|
| |
| const { state: clean } = makeState(); |
| clean.noteIteration(); |
| feedPlanWithStep(clean, "s1"); |
| clean.recordToolOutcome({ |
| toolCallId: "c1", |
| toolName: "search_pubmed", |
| args: { query: "x" }, |
| result: { hits: [{ pmid: "1", url: "https://e/1" }] }, |
| isError: false, |
| durationMs: 1, |
| }); |
| clean.ingestTextDelta( |
| `<step_update>${JSON.stringify({ id: "s1", status: "done" })}</step_update>\n`, |
| ); |
| assert.equal(clean.buildPendingWorkingMemory(), null); |
| assert.equal(formatCarryOverPrompt(null), ""); |
| }); |
|
|
| test("reflection-only unresolved steps are also carried forward", () => { |
| const { state } = makeState(); |
| state.noteIteration(); |
| feedPlanWithStep(state, "s2", { goal: "Cross-reference disease" }); |
| |
| state.ingestTextDelta( |
| `<reflection>${JSON.stringify({ |
| step_states: [{ id: "s2", status: "pending" }], |
| unresolved: [{ id: "s2", reason: "no time" }], |
| })}</reflection>\n`, |
| ); |
| const record = state.buildPendingWorkingMemory(); |
| assert.ok(record); |
| assert.equal(record!.unresolved_steps.length, 1); |
| assert.equal(record!.unresolved_steps[0]!.id, "s2"); |
| }); |
|
|
| |
|
|
| test("agent tags are stripped from user-visible deltas", () => { |
| const { state } = makeState(); |
| const visible = state.ingestTextDelta( |
| `Hello!\n<plan>${JSON.stringify({ |
| steps: [{ id: "s1", goal: "g" }], |
| })}</plan>\nWorld\n`, |
| ); |
| |
| assert.match(visible, /Hello!/); |
| assert.match(visible, /World/); |
| assert.doesNotMatch(visible, /<plan>/); |
| assert.doesNotMatch(visible, /<\/plan>/); |
| }); |
|
|