| import type { LlmTool } from "../llm/types"; |
| import { |
| recordInvocation, |
| captureOutgoingSeamSamples, |
| resolveNodeForDirectReference, |
| } from "../lib/tool-graph"; |
| import { logger } from "../lib/logger"; |
|
|
| |
| |
| |
| |
| |
| |
| |
| |
|
|
| export interface ToolDef { |
| schema: LlmTool; |
| invoke(args: Record<string, unknown>): Promise<unknown>; |
| } |
|
|
| const PUBMED_BASE = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils"; |
| const UNIPROT_BASE = "https://rest.uniprot.org"; |
| const OPENTARGETS_BASE = "https://api.platform.opentargets.org/api/v4/graphql"; |
|
|
| |
| |
| |
| |
| |
| |
| |
| |
| class FetchError extends Error { |
| status: number | null; |
| retryable: boolean; |
| attempts: number; |
| constructor( |
| message: string, |
| opts: { status: number | null; retryable: boolean; attempts: number }, |
| ) { |
| super(message); |
| this.name = "FetchError"; |
| this.status = opts.status; |
| this.retryable = opts.retryable; |
| this.attempts = opts.attempts; |
| } |
| } |
|
|
| const FETCH_TIMEOUT_MS = 15_000; |
| const FETCH_MAX_ATTEMPTS = 4; |
| const FETCH_BACKOFF_MS = [400, 1200, 3000]; |
|
|
| function isTransientStatus(status: number): boolean { |
| return status === 408 || status === 425 || status === 429 || status >= 500; |
| } |
|
|
| async function fetchJson(url: string, init?: RequestInit): Promise<unknown> { |
| let lastErr: unknown = null; |
| let lastStatus: number | null = null; |
| for (let attempt = 1; attempt <= FETCH_MAX_ATTEMPTS; attempt++) { |
| const ctl = new AbortController(); |
| const timer = setTimeout(() => ctl.abort(), FETCH_TIMEOUT_MS); |
| try { |
| const res = await fetch(url, { |
| ...init, |
| signal: init?.signal ?? ctl.signal, |
| headers: { |
| Accept: "application/json", |
| "User-Agent": "DoAtlas/1.0 (research workbench)", |
| ...(init?.headers || {}), |
| }, |
| }); |
| clearTimeout(timer); |
| if (res.ok) return res.json(); |
| lastStatus = res.status; |
| if (isTransientStatus(res.status) && attempt < FETCH_MAX_ATTEMPTS) { |
| const retryAfter = Number(res.headers.get("retry-after")); |
| const wait = Number.isFinite(retryAfter) && retryAfter > 0 |
| ? Math.min(10_000, retryAfter * 1000) |
| : FETCH_BACKOFF_MS[attempt - 1] ?? 3000; |
| await new Promise((r) => setTimeout(r, wait)); |
| lastErr = new Error(`HTTP ${res.status} from ${url}`); |
| continue; |
| } |
| throw new FetchError(`HTTP ${res.status} from ${url}`, { |
| status: res.status, |
| retryable: isTransientStatus(res.status), |
| attempts: attempt, |
| }); |
| } catch (err) { |
| clearTimeout(timer); |
| if (err instanceof FetchError) throw err; |
| lastErr = err; |
| const msg = err instanceof Error ? err.message : String(err); |
| const isAbort = msg.includes("aborted") || msg.includes("timeout"); |
| |
| |
| |
| if (attempt < FETCH_MAX_ATTEMPTS) { |
| const wait = FETCH_BACKOFF_MS[attempt - 1] ?? 3000; |
| await new Promise((r) => setTimeout(r, wait)); |
| continue; |
| } |
| throw new FetchError( |
| `network error after ${attempt} attempts: ${msg}`, |
| { |
| status: lastStatus, |
| retryable: true, |
| attempts: attempt, |
| }, |
| ); |
| } |
| } |
| throw new FetchError( |
| `exhausted retries: ${lastErr instanceof Error ? lastErr.message : String(lastErr)}`, |
| { status: lastStatus, retryable: true, attempts: FETCH_MAX_ATTEMPTS }, |
| ); |
| } |
|
|
| function fetchErrorPayload(err: unknown): { |
| error: string; |
| error_code: string; |
| retryable: boolean; |
| attempts: number; |
| status: number | null; |
| hint: string; |
| } { |
| if (err instanceof FetchError) { |
| const code = |
| err.status === 429 |
| ? "rate_limited" |
| : err.status && err.status >= 500 |
| ? "upstream_unavailable" |
| : err.status |
| ? `http_${err.status}` |
| : "network_error"; |
| const hint = |
| err.status === 429 |
| ? "Upstream rate limit hit even after backoff retries. Try a narrower or differently-worded query, or fall back to a different source tool." |
| : err.status && err.status >= 500 |
| ? "Upstream service is failing. Wait briefly or try an alternative tool." |
| : !err.status |
| ? "Network/timeout error. Retry with a smaller scope, or switch to an alternative tool." |
| : "Upstream rejected the request. Adjust the query and try again, or use a different tool."; |
| return { |
| error: err.message, |
| error_code: code, |
| retryable: err.retryable, |
| attempts: err.attempts, |
| status: err.status, |
| hint, |
| }; |
| } |
| const msg = err instanceof Error ? err.message : String(err); |
| return { |
| error: msg, |
| error_code: "unknown", |
| retryable: false, |
| attempts: 1, |
| status: null, |
| hint: "Inspect the error message; consider an alternative tool or query.", |
| }; |
| } |
|
|
| function safe<T extends Record<string, unknown>>(args: unknown): T { |
| return (args && typeof args === "object" ? args : {}) as T; |
| } |
|
|
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| async function runAtomicStep<T>( |
| nodeName: string, |
| argsSummary: Record<string, unknown> | null, |
| fn: () => Promise<T>, |
| ): Promise<T> { |
| const start = Date.now(); |
| |
| |
| |
| |
| |
| let resolved: Awaited<ReturnType<typeof resolveNodeForDirectReference>> = null; |
| try { |
| resolved = await resolveNodeForDirectReference(nodeName); |
| } catch (err) { |
| logger.debug( |
| { err, nodeName }, |
| "runAtomicStep deprecation pre-resolve failed", |
| ); |
| } |
| if (resolved && resolved.deprecated) { |
| logger.warn( |
| { |
| nodeName: resolved.nodeName, |
| replacedBy: resolved.replacedBy, |
| kind: "tool_dispatch_deprecated", |
| }, |
| "tool dispatch: deprecated node invoked by direct reference — refusing dispatch", |
| ); |
| const err = new Error( |
| `tool '${resolved.nodeName}' is deprecated; use '${resolved.replacedBy ?? "(no replacement set)"}' instead`, |
| ) as Error & { |
| code: string; |
| nodeName: string; |
| replacedBy: string | null; |
| }; |
| err.code = "TOOL_DEPRECATED"; |
| err.nodeName = resolved.nodeName; |
| err.replacedBy = resolved.replacedBy; |
| throw err; |
| } |
| try { |
| const result = await fn(); |
| void recordInvocation({ |
| nodeName, |
| ok: true, |
| argsSummary, |
| resultPreview: result, |
| durationMs: Date.now() - start, |
| }).catch((err) => |
| logger.debug({ err, nodeName }, "runAtomicStep ok-record failed"), |
| ); |
| |
| |
| |
| void captureOutgoingSeamSamples(nodeName, result).catch((err) => |
| logger.debug({ err, nodeName }, "runAtomicStep seam-capture failed"), |
| ); |
| return result; |
| } catch (err) { |
| void recordInvocation({ |
| nodeName, |
| ok: false, |
| argsSummary, |
| resultPreview: null, |
| errorCode: err instanceof Error ? err.name : "error", |
| durationMs: Date.now() - start, |
| }).catch((logErr) => |
| logger.debug({ err: logErr, nodeName }, "runAtomicStep err-record failed"), |
| ); |
| throw err; |
| } |
| } |
|
|
| const searchPubmed: ToolDef = { |
| schema: { |
| name: "search_pubmed", |
| description: |
| "在 NCBI PubMed 检索生物医学文献,返回前 N 条命中(含 PMID、标题、作者、期刊、年份、DOI、摘要片段)。仅检索英文文献。", |
| parameters: { |
| type: "object", |
| properties: { |
| query: { |
| type: "string", |
| description: |
| "PubMed 检索语句,支持 MeSH、字段限定、布尔运算,例如 'HER2 AND trastuzumab AND 2024[dp]'。", |
| }, |
| max_results: { |
| type: "integer", |
| minimum: 1, |
| maximum: 20, |
| default: 10, |
| }, |
| }, |
| required: ["query"], |
| }, |
| }, |
| async invoke(args) { |
| const a = safe<{ query: string; max_results?: number }>(args); |
| const q = a.query?.trim(); |
| if (!q) return { error: "query is required" }; |
| const max = Math.max(1, Math.min(20, a.max_results || 10)); |
| try { |
| const search = (await fetchJson( |
| `${PUBMED_BASE}/esearch.fcgi?db=pubmed&retmode=json&retmax=${max}&term=${encodeURIComponent(q)}`, |
| )) as { |
| esearchresult?: { idlist?: string[]; count?: string }; |
| }; |
| const ids = search.esearchresult?.idlist || []; |
| if (ids.length === 0) { |
| return { count: 0, hits: [], source: "pubmed" }; |
| } |
| const summary = (await fetchJson( |
| `${PUBMED_BASE}/esummary.fcgi?db=pubmed&retmode=json&id=${ids.join(",")}`, |
| )) as { result?: Record<string, unknown> }; |
| const result = summary.result || {}; |
| const hits = ids.map((pmid) => { |
| const it = (result[pmid] || {}) as { |
| title?: string; |
| source?: string; |
| pubdate?: string; |
| authors?: Array<{ name: string }>; |
| articleids?: Array<{ idtype: string; value: string }>; |
| }; |
| const doi = (it.articleids || []).find((a) => a.idtype === "doi")?.value; |
| return { |
| pmid, |
| title: it.title || "", |
| journal: it.source || "", |
| year: (it.pubdate || "").split(" ")[0] || "", |
| authors: (it.authors || []).slice(0, 6).map((a) => a.name), |
| doi, |
| url: `https://pubmed.ncbi.nlm.nih.gov/${pmid}/`, |
| }; |
| }); |
| return { count: Number(search.esearchresult?.count || ids.length), hits, source: "pubmed" }; |
| } catch (err) { |
| return fetchErrorPayload(err); |
| } |
| }, |
| }; |
|
|
| const lookupUniprot: ToolDef = { |
| schema: { |
| name: "lookup_uniprot", |
| description: |
| "在 UniProt 检索蛋白质条目,返回基础信息(accession、推荐名、基因名、物种、序列长度、功能描述)。", |
| parameters: { |
| type: "object", |
| properties: { |
| query: { |
| type: "string", |
| description: |
| "UniProt 检索语句,例如 'gene:BRCA1 AND organism_id:9606' 或 'P38398'。", |
| }, |
| max_results: { |
| type: "integer", |
| minimum: 1, |
| maximum: 10, |
| default: 5, |
| }, |
| }, |
| required: ["query"], |
| }, |
| }, |
| async invoke(args) { |
| const a = safe<{ query: string; max_results?: number }>(args); |
| const q = a.query?.trim(); |
| if (!q) return { error: "query is required" }; |
| const size = Math.max(1, Math.min(10, a.max_results || 5)); |
| try { |
| const data = (await fetchJson( |
| `${UNIPROT_BASE}/uniprotkb/search?query=${encodeURIComponent(q)}&format=json&size=${size}&fields=accession,id,protein_name,gene_names,organism_name,length,cc_function`, |
| )) as { |
| results?: Array<{ |
| primaryAccession?: string; |
| uniProtkbId?: string; |
| proteinDescription?: { |
| recommendedName?: { fullName?: { value?: string } }; |
| }; |
| genes?: Array<{ geneName?: { value?: string } }>; |
| organism?: { scientificName?: string }; |
| sequence?: { length?: number }; |
| comments?: Array<{ |
| commentType?: string; |
| texts?: Array<{ value?: string }>; |
| }>; |
| }>; |
| }; |
| const hits = (data.results || []).map((it) => ({ |
| accession: it.primaryAccession, |
| entry_name: it.uniProtkbId, |
| recommended_name: |
| it.proteinDescription?.recommendedName?.fullName?.value, |
| gene: (it.genes || []).map((g) => g.geneName?.value).filter(Boolean), |
| organism: it.organism?.scientificName, |
| length: it.sequence?.length, |
| function: (it.comments || []) |
| .filter((c) => c.commentType === "FUNCTION") |
| .flatMap((c) => (c.texts || []).map((t) => t.value)) |
| .filter(Boolean) |
| .join(" ") |
| .slice(0, 800), |
| url: it.primaryAccession |
| ? `https://www.uniprot.org/uniprotkb/${it.primaryAccession}` |
| : undefined, |
| })); |
| return { count: hits.length, hits, source: "uniprot" }; |
| } catch (err) { |
| return fetchErrorPayload(err); |
| } |
| }, |
| }; |
|
|
| const queryOpenTargets: ToolDef = { |
| schema: { |
| name: "query_opentargets", |
| description: |
| "在 Open Targets Platform 查询「靶点 ↔ 疾病」证据:给定基因符号,返回与该靶点关联最强的疾病列表(含整体打分、数据来源数)。", |
| parameters: { |
| type: "object", |
| properties: { |
| gene_symbol: { |
| type: "string", |
| description: "HGNC 基因符号,例如 'BRCA1'、'EGFR'。", |
| }, |
| max_results: { |
| type: "integer", |
| minimum: 1, |
| maximum: 25, |
| default: 10, |
| }, |
| }, |
| required: ["gene_symbol"], |
| }, |
| }, |
| async invoke(args) { |
| const a = safe<{ gene_symbol: string; max_results?: number }>(args); |
| const g = a.gene_symbol?.trim(); |
| if (!g) return { error: "gene_symbol is required" }; |
| const size = Math.max(1, Math.min(25, a.max_results || 10)); |
|
|
| try { |
| |
| const target = await runAtomicStep( |
| "opentargets_find_target_id", |
| { gene_symbol: g }, |
| () => opentargetsFindTargetId(g), |
| ); |
| if (!target) return { error: `No target found for gene ${g}` }; |
|
|
| |
| const assoc = await runAtomicStep( |
| "opentargets_get_target_associations", |
| { ensembl_id: target.id, size }, |
| () => opentargetsGetAssociations(target.id, size), |
| ); |
| const t = assoc.data?.target; |
| if (!t) return { error: "Failed to fetch associations" }; |
| return { |
| target: { |
| ensembl_id: t.id, |
| symbol: t.approvedSymbol, |
| name: t.approvedName, |
| }, |
| count: t.associatedDiseases?.count || 0, |
| diseases: (t.associatedDiseases?.rows || []).map((r) => ({ |
| id: r.disease.id, |
| name: r.disease.name, |
| score: r.score, |
| datasources: r.datasourceScores?.length ?? 0, |
| url: `https://platform.opentargets.org/disease/${r.disease.id}`, |
| })), |
| source: "opentargets", |
| }; |
| } catch (err) { |
| return fetchErrorPayload(err); |
| } |
| }, |
| }; |
|
|
| |
| |
| |
| |
| |
|
|
| const OT_FIND_QUERY = `query($q: String!) { search(queryString: $q, entityNames: ["target"]) { hits { id name entity } } }`; |
| const OT_ASSOC_QUERY = `query($id: String!, $size: Int!) { |
| target(ensemblId: $id) { |
| id |
| approvedSymbol |
| approvedName |
| associatedDiseases(page: { index: 0, size: $size }) { |
| count |
| rows { |
| score |
| datasourceScores { id score } |
| disease { id name therapeuticAreas { id name } } |
| } |
| } |
| } |
| }`; |
|
|
| async function opentargetsFindTargetId( |
| gene_symbol: string, |
| ): Promise<{ id: string; name: string } | null> { |
| const lookup = (await fetchJson(OPENTARGETS_BASE, { |
| method: "POST", |
| headers: { "Content-Type": "application/json" }, |
| body: JSON.stringify({ |
| query: OT_FIND_QUERY, |
| variables: { q: gene_symbol }, |
| }), |
| })) as { |
| data?: { search?: { hits?: Array<{ id: string; name: string }> } }; |
| }; |
| return lookup.data?.search?.hits?.[0] ?? null; |
| } |
|
|
| interface OpentargetsAssociationsResponse { |
| data?: { |
| target?: { |
| id: string; |
| approvedSymbol: string; |
| approvedName: string; |
| associatedDiseases?: { |
| count: number; |
| rows: Array<{ |
| score: number; |
| datasourceScores?: Array<{ id: string; score: number }>; |
| disease: { id: string; name: string }; |
| }>; |
| }; |
| }; |
| }; |
| } |
|
|
| async function opentargetsGetAssociations( |
| ensembl_id: string, |
| size: number, |
| ): Promise<OpentargetsAssociationsResponse> { |
| return (await fetchJson(OPENTARGETS_BASE, { |
| method: "POST", |
| headers: { "Content-Type": "application/json" }, |
| body: JSON.stringify({ |
| query: OT_ASSOC_QUERY, |
| variables: { id: ensembl_id, size }, |
| }), |
| })) as OpentargetsAssociationsResponse; |
| } |
|
|
| |
| |
| |
| |
| |
| |
| const summarizeLiteratureBucket: ToolDef = { |
| schema: { |
| name: "summarize_literature_bucket", |
| description: |
| "Get the meta-analytic pooled literature estimate for a single bucket (θ̂, CI, method, I², supporting paper/edge counts). Use this to cite the pooled effect across the literature for a (X, Y, population, measure, time) bucket.", |
| parameters: { |
| type: "object", |
| properties: { |
| bucket_key: { |
| type: "string", |
| description: |
| "Bucket id from PostgresCausalNetwork (e.g. 'B:...') or composite bucket_key from the legacy JSONL store.", |
| }, |
| }, |
| required: ["bucket_key"], |
| }, |
| }, |
| async invoke(args) { |
| const bucket_key = String(args?.bucket_key || "").trim(); |
| if (!bucket_key) return { error: "bucket_key is required" }; |
| const { causalGetBucket } = await import("../lib/causal-store"); |
| const { getBucketByKey, ensureLoaded } = await import( |
| "../lib/research-data" |
| ); |
| try { |
| await ensureLoaded(); |
| } catch { |
| |
| |
| } |
| |
| |
| |
| |
| const pg = await runAtomicStep( |
| "aggregate_bucket_summary", |
| { bucket_key }, |
| () => causalGetBucket(bucket_key), |
| ); |
| if (pg) { |
| return { |
| bucket_id: pg.bucket_id, |
| bucket_key: pg.bucket_key, |
| supporting_edges: pg.supporting_edges, |
| paper_count: pg.paper_ids.length, |
| status: pg.status, |
| pooled_estimate: pg.pooled_estimate, |
| }; |
| } |
| |
| |
| |
| |
| const local = await runAtomicStep( |
| "aggregate_bucket_summary", |
| { bucket_key, source: "local_jsonl" }, |
| async () => getBucketByKey(bucket_key), |
| ); |
| if (local) { |
| return { |
| bucket_id: local.bucket_key, |
| bucket_key: { |
| kappa_pi: local.population_bucket, |
| X: local.x_norm, |
| Y: local.y_norm, |
| tau_hat: local.time_index, |
| ctype: local.equation_type, |
| kappa_mu: `${local.measure_type}:${local.measure_scale}`, |
| }, |
| supporting_edges: local.edge_count, |
| paper_count: local.paper_count, |
| status: "review_required", |
| pooled_estimate: null, |
| note: "No pooled estimate available — bucket has not been ingested into the persisted causal network yet.", |
| }; |
| } |
| return { error: "Bucket not found", bucket_key }; |
| }, |
| }; |
|
|
| |
| export const PUBLIC_API_TOOLS: ToolDef[] = [ |
| searchPubmed, |
| lookupUniprot, |
| queryOpenTargets, |
| summarizeLiteratureBucket, |
| ]; |
|
|
| |
| |
| |
| import { RESEARCH_ENGINE_TOOLS } from "./research-engine-tools"; |
| import { ATOMIC_TOOLS } from "./atomic-tools"; |
| import { performWebSearch } from "./web-search"; |
|
|
| |
| |
| |
| |
| |
| |
| const webSearchInvoker = async ( |
| args: Record<string, unknown>, |
| ): Promise<unknown> => { |
| |
| |
| const query = |
| typeof args["query"] === "string" |
| ? (args["query"] as string) |
| : typeof args["query_key"] === "string" |
| ? (args["query_key"] as string) |
| : Array.isArray(args["query_tag_list"]) |
| ? (args["query_tag_list"] as unknown[]).map(String).join(" ") |
| : ""; |
| const max = typeof args["max_results"] === "number" |
| ? (args["max_results"] as number) |
| : 6; |
| return performWebSearch({ query, max_results: max }); |
| }; |
|
|
| const webSearch: ToolDef = { |
| schema: { |
| name: "web_search", |
| description: |
| "Search the live web and return a list of {title, url, snippet, source} results. Use for current events, recent papers, drug news, regulatory filings, or anything where the in-house biomedical APIs do not have an answer.", |
| parameters: { |
| type: "object", |
| properties: { |
| query: { type: "string", description: "Search query in natural language." }, |
| max_results: { |
| type: "integer", |
| minimum: 1, |
| maximum: 10, |
| description: "Number of results to return; default 6.", |
| }, |
| }, |
| required: ["query"], |
| additionalProperties: false, |
| }, |
| }, |
| invoke: webSearchInvoker, |
| }; |
|
|
| const pluginWebSearch: ToolDef = { |
| schema: { |
| name: "plugin_web_search", |
| description: |
| "MiniMax-compatible alias of `web_search`. Same semantics; provided so MiniMax M2 can call its expected tool name without a separate handler.", |
| parameters: { |
| type: "object", |
| properties: { |
| query_key: { type: "string", description: "Search query string." }, |
| query_tag_list: { |
| type: "array", |
| items: { type: "string" }, |
| description: "Optional list of keywords; joined into one query.", |
| }, |
| max_results: { type: "integer", minimum: 1, maximum: 10 }, |
| }, |
| additionalProperties: true, |
| }, |
| }, |
| invoke: webSearchInvoker, |
| }; |
|
|
| export const TOOLS: ToolDef[] = [ |
| ...PUBLIC_API_TOOLS, |
| ...ATOMIC_TOOLS, |
| ...RESEARCH_ENGINE_TOOLS, |
| webSearch, |
| pluginWebSearch, |
| ]; |
|
|
| export const TOOL_SCHEMAS: LlmTool[] = TOOLS.map((t) => t.schema); |
|
|
| export function findTool(name: string): ToolDef | undefined { |
| return TOOLS.find((t) => t.schema.name === name); |
| } |
|
|