// ─── Runtime ──────────────────────────────────────────────────────────────── // The heart of the harness: manages session state, orchestration, tool execution, // budgets, retries, and structured planner/executor/evaluator roles. import { nanoid } from 'nanoid'; import { EventBus, now, type HarnessEvent, type PlanItem, type TokenUsage, type ToolCallRecord } from '../events/index.js'; import type { ProviderAdapter, ModelInvocation, Message, ToolCallRequest, ModelResult } from '../provider/index.js'; import { ToolRegistry, type ToolDef, type ToolContext } from '../tools/index.js'; import { SkillRegistry } from '../skills/index.js'; import { PolicyEngine } from '../policy/index.js'; import { MetricsCollector } from '../observability/index.js'; import { ArtifactStore } from '../artifacts/index.js'; import { Evaluator, type EvalContext } from '../evaluators/index.js'; // ─── Session State ────────────────────────────────────────────────────────── export interface SessionState { id: string; goal: string; plan: PlanItem[]; messages: Message[]; artifacts: string[]; // artifact IDs provider: string; model: string; skills: string[]; budgetUsed: { tokens: number; costUsd: number }; status: 'running' | 'completed' | 'failed' | 'paused'; retries: number; maxRetries: number; createdAt: string; } export interface RuntimeConfig { provider: ProviderAdapter; model: string; tools: ToolRegistry; skills: SkillRegistry; policy: PolicyEngine; metrics: MetricsCollector; artifacts: ArtifactStore; evaluator: Evaluator; eventBus: EventBus; systemPrompt: string; activeSkills: string[]; maxRetries?: number; maxTurns?: number; budgetTokens?: number; budgetCostUsd?: number; approvalHandler?: (toolCall: ToolCallRecord) => Promise; } // ─── Runtime ──────────────────────────────────────────────────────────────── export class Runtime { private state: SessionState; private config: RuntimeConfig; private abortController = new AbortController(); private turn = 0; constructor(config: RuntimeConfig, goal: string) { this.config = config; this.state = { id: nanoid(), goal, plan: [], messages: [], artifacts: [], provider: config.provider.id, model: config.model, skills: config.activeSkills, budgetUsed: { tokens: 0, costUsd: 0 }, status: 'running', retries: 0, maxRetries: config.maxRetries ?? 3, createdAt: now(), }; } get sessionId(): string { return this.state.id; } get status(): string { return this.state.status; } private emit(event: HarnessEvent): void { this.config.eventBus.emit(event); } async run(): Promise { this.emit({ type: 'session.started', sessionId: this.state.id, goal: this.state.goal, timestamp: now() }); // Build system message const skillInstructions = this.config.skills.buildInstructions(this.config.activeSkills); const systemMsg: Message = { role: 'system', content: [this.config.systemPrompt, skillInstructions].filter(Boolean).join('\n\n---\n\n'), }; this.state.messages = [systemMsg, { role: 'user', content: this.state.goal }]; const maxTurns = this.config.maxTurns ?? 20; try { while (this.state.status === 'running' && this.turn < maxTurns) { this.turn++; // Budget check const budgetCheck = this.config.policy.checkBudget(this.state.budgetUsed); if (!budgetCheck.ok) { this.emit({ type: 'budget.warning', usage: { promptTokens: 0, completionTokens: 0, totalTokens: this.state.budgetUsed.tokens, estimatedCostUsd: this.state.budgetUsed.costUsd }, limit: this.config.budgetTokens ?? 0, timestamp: now() }); break; } // Invoke model const toolDefs = this.config.tools.listForModel(); const invocation: ModelInvocation = { model: this.config.model, messages: this.state.messages, tools: toolDefs, signal: this.abortController.signal, }; this.emit({ type: 'model.request.start', provider: this.config.provider.id, model: this.config.model, timestamp: now() }); const startMs = Date.now(); let result: ModelResult; try { result = await this.config.provider.invoke(invocation); } catch (err) { const errMsg = err instanceof Error ? err.message : String(err); this.emit({ type: 'error', message: `Model call failed: ${errMsg}`, timestamp: now() }); if (this.state.retries < this.state.maxRetries) { this.state.retries++; continue; } this.state.status = 'failed'; break; } const durationMs = Date.now() - startMs; this.state.budgetUsed.tokens += result.usage.totalTokens; this.state.budgetUsed.costUsd += result.usage.estimatedCostUsd ?? 0; this.emit({ type: 'model.request.end', provider: this.config.provider.id, model: this.config.model, usage: result.usage, durationMs, timestamp: now() }); this.config.metrics.record({ timestamp: now(), type: 'model-call', provider: this.config.provider.id, model: this.config.model, durationMs, usage: result.usage, success: true }); // Handle assistant response if (result.content) { this.emit({ type: 'model.stream.end', fullText: result.content, timestamp: now() }); } this.state.messages.push({ role: 'assistant', content: result.content, toolCalls: result.toolCalls, }); // If no tool calls, we're done if (!result.toolCalls || result.toolCalls.length === 0) { this.state.status = 'completed'; break; } // Execute tool calls for (const tc of result.toolCalls) { const toolResult = await this.executeTool(tc); this.state.messages.push({ role: 'tool', content: typeof toolResult === 'string' ? toolResult : JSON.stringify(toolResult), toolCallId: tc.id, }); } } // Run evaluation if (this.state.status === 'completed') { this.emit({ type: 'evaluation.started', timestamp: now() }); const evalCtx: EvalContext = { sessionId: this.state.id, goal: this.state.goal, artifacts: this.config.artifacts.list().map((a) => ({ path: a.path, content: a.content, type: a.type })), assistantOutput: this.state.messages.filter((m) => m.role === 'assistant').map((m) => m.content).join('\n'), workDir: process.cwd(), }; const report = await this.config.evaluator.evaluate(evalCtx); this.emit({ type: 'evaluation.completed', report, timestamp: now() }); } this.emit({ type: 'session.completed', sessionId: this.state.id, summary: `Completed in ${this.turn} turns.`, timestamp: now() }); } catch (err) { const errMsg = err instanceof Error ? err.message : String(err); this.state.status = 'failed'; this.emit({ type: 'session.failed', sessionId: this.state.id, error: errMsg, timestamp: now() }); } return this.state; } private async executeTool(tc: ToolCallRequest): Promise { const tool = this.config.tools.getByName(tc.name); if (!tool) { const errMsg = `Unknown tool: ${tc.name}`; this.emit({ type: 'tool.failed', toolCallId: tc.id, error: errMsg, durationMs: 0, timestamp: now() }); return { error: errMsg }; } // Parse input let input: unknown; try { const raw = JSON.parse(tc.arguments); input = tool.inputSchema.parse(raw); } catch (err) { const errMsg = `Invalid tool input: ${err instanceof Error ? err.message : String(err)}`; this.emit({ type: 'tool.failed', toolCallId: tc.id, error: errMsg, durationMs: 0, timestamp: now() }); return { error: errMsg }; } // Policy check if (this.config.policy.isBlocked(tool.name)) { this.emit({ type: 'tool.denied', toolCallId: tc.id, reason: 'Tool is blocked by policy.', timestamp: now() }); return { error: 'Tool blocked by policy.' }; } const needsApproval = this.config.policy.requiresApproval(tool.permission, tool.name); const record: ToolCallRecord = { id: tc.id, toolId: tool.id, toolName: tool.name, input, status: needsApproval ? 'awaiting-approval' : 'queued', }; this.emit({ type: 'tool.requested', toolCall: record, timestamp: now() }); if (needsApproval && this.config.approvalHandler) { const approved = await this.config.approvalHandler(record); if (!approved) { this.emit({ type: 'tool.denied', toolCallId: tc.id, reason: 'User denied.', timestamp: now() }); return { error: 'Tool call denied by user.' }; } this.emit({ type: 'tool.approved', toolCallId: tc.id, timestamp: now() }); } // Execute this.emit({ type: 'tool.started', toolCallId: tc.id, timestamp: now() }); const startMs = Date.now(); const ctx: ToolContext = { sessionId: this.state.id, workDir: process.cwd(), signal: this.abortController.signal, emit: (msg) => this.emit({ type: 'tool.progress', toolCallId: tc.id, message: msg, timestamp: now() }), }; let retries = 0; while (retries <= tool.retries) { try { const result = await Promise.race([ tool.execute(input, ctx), new Promise((_, reject) => setTimeout(() => reject(new Error('Tool timeout')), tool.timeout)), ]); const durationMs = Date.now() - startMs; this.emit({ type: 'tool.finished', toolCallId: tc.id, result, durationMs, timestamp: now() }); this.config.metrics.record({ timestamp: now(), type: 'tool-call', toolName: tool.name, durationMs, success: true }); return result; } catch (err) { retries++; if (retries > tool.retries) { const durationMs = Date.now() - startMs; const errMsg = err instanceof Error ? err.message : String(err); this.emit({ type: 'tool.failed', toolCallId: tc.id, error: errMsg, durationMs, timestamp: now() }); this.config.metrics.record({ timestamp: now(), type: 'tool-call', toolName: tool.name, durationMs, success: false, error: errMsg }); return { error: errMsg }; } this.config.metrics.record({ timestamp: now(), type: 'retry', toolName: tool.name, durationMs: 0, success: false }); } } return { error: 'Unexpected tool execution path.' }; } pause(): void { this.state.status = 'paused'; } resume(): void { if (this.state.status === 'paused') this.state.status = 'running'; } cancel(): void { this.abortController.abort(); this.state.status = 'failed'; } getState(): Readonly { return this.state; } }