ai-harness / src /core /runtime /index.ts
stevenkhan's picture
Initial AI Harness - production-grade model-agnostic CLI agent runtime
908562b verified
// ─── Runtime ────────────────────────────────────────────────────────────────
// The heart of the harness: manages session state, orchestration, tool execution,
// budgets, retries, and structured planner/executor/evaluator roles.
import { nanoid } from 'nanoid';
import { EventBus, now, type HarnessEvent, type PlanItem, type TokenUsage, type ToolCallRecord } from '../events/index.js';
import type { ProviderAdapter, ModelInvocation, Message, ToolCallRequest, ModelResult } from '../provider/index.js';
import { ToolRegistry, type ToolDef, type ToolContext } from '../tools/index.js';
import { SkillRegistry } from '../skills/index.js';
import { PolicyEngine } from '../policy/index.js';
import { MetricsCollector } from '../observability/index.js';
import { ArtifactStore } from '../artifacts/index.js';
import { Evaluator, type EvalContext } from '../evaluators/index.js';
// ─── Session State ──────────────────────────────────────────────────────────
export interface SessionState {
id: string;
goal: string;
plan: PlanItem[];
messages: Message[];
artifacts: string[]; // artifact IDs
provider: string;
model: string;
skills: string[];
budgetUsed: { tokens: number; costUsd: number };
status: 'running' | 'completed' | 'failed' | 'paused';
retries: number;
maxRetries: number;
createdAt: string;
}
export interface RuntimeConfig {
provider: ProviderAdapter;
model: string;
tools: ToolRegistry;
skills: SkillRegistry;
policy: PolicyEngine;
metrics: MetricsCollector;
artifacts: ArtifactStore;
evaluator: Evaluator;
eventBus: EventBus;
systemPrompt: string;
activeSkills: string[];
maxRetries?: number;
maxTurns?: number;
budgetTokens?: number;
budgetCostUsd?: number;
approvalHandler?: (toolCall: ToolCallRecord) => Promise<boolean>;
}
// ─── Runtime ────────────────────────────────────────────────────────────────
export class Runtime {
private state: SessionState;
private config: RuntimeConfig;
private abortController = new AbortController();
private turn = 0;
constructor(config: RuntimeConfig, goal: string) {
this.config = config;
this.state = {
id: nanoid(),
goal,
plan: [],
messages: [],
artifacts: [],
provider: config.provider.id,
model: config.model,
skills: config.activeSkills,
budgetUsed: { tokens: 0, costUsd: 0 },
status: 'running',
retries: 0,
maxRetries: config.maxRetries ?? 3,
createdAt: now(),
};
}
get sessionId(): string { return this.state.id; }
get status(): string { return this.state.status; }
private emit(event: HarnessEvent): void {
this.config.eventBus.emit(event);
}
async run(): Promise<SessionState> {
this.emit({ type: 'session.started', sessionId: this.state.id, goal: this.state.goal, timestamp: now() });
// Build system message
const skillInstructions = this.config.skills.buildInstructions(this.config.activeSkills);
const systemMsg: Message = {
role: 'system',
content: [this.config.systemPrompt, skillInstructions].filter(Boolean).join('\n\n---\n\n'),
};
this.state.messages = [systemMsg, { role: 'user', content: this.state.goal }];
const maxTurns = this.config.maxTurns ?? 20;
try {
while (this.state.status === 'running' && this.turn < maxTurns) {
this.turn++;
// Budget check
const budgetCheck = this.config.policy.checkBudget(this.state.budgetUsed);
if (!budgetCheck.ok) {
this.emit({ type: 'budget.warning', usage: { promptTokens: 0, completionTokens: 0, totalTokens: this.state.budgetUsed.tokens, estimatedCostUsd: this.state.budgetUsed.costUsd }, limit: this.config.budgetTokens ?? 0, timestamp: now() });
break;
}
// Invoke model
const toolDefs = this.config.tools.listForModel();
const invocation: ModelInvocation = {
model: this.config.model,
messages: this.state.messages,
tools: toolDefs,
signal: this.abortController.signal,
};
this.emit({ type: 'model.request.start', provider: this.config.provider.id, model: this.config.model, timestamp: now() });
const startMs = Date.now();
let result: ModelResult;
try {
result = await this.config.provider.invoke(invocation);
} catch (err) {
const errMsg = err instanceof Error ? err.message : String(err);
this.emit({ type: 'error', message: `Model call failed: ${errMsg}`, timestamp: now() });
if (this.state.retries < this.state.maxRetries) {
this.state.retries++;
continue;
}
this.state.status = 'failed';
break;
}
const durationMs = Date.now() - startMs;
this.state.budgetUsed.tokens += result.usage.totalTokens;
this.state.budgetUsed.costUsd += result.usage.estimatedCostUsd ?? 0;
this.emit({ type: 'model.request.end', provider: this.config.provider.id, model: this.config.model, usage: result.usage, durationMs, timestamp: now() });
this.config.metrics.record({ timestamp: now(), type: 'model-call', provider: this.config.provider.id, model: this.config.model, durationMs, usage: result.usage, success: true });
// Handle assistant response
if (result.content) {
this.emit({ type: 'model.stream.end', fullText: result.content, timestamp: now() });
}
this.state.messages.push({
role: 'assistant',
content: result.content,
toolCalls: result.toolCalls,
});
// If no tool calls, we're done
if (!result.toolCalls || result.toolCalls.length === 0) {
this.state.status = 'completed';
break;
}
// Execute tool calls
for (const tc of result.toolCalls) {
const toolResult = await this.executeTool(tc);
this.state.messages.push({
role: 'tool',
content: typeof toolResult === 'string' ? toolResult : JSON.stringify(toolResult),
toolCallId: tc.id,
});
}
}
// Run evaluation
if (this.state.status === 'completed') {
this.emit({ type: 'evaluation.started', timestamp: now() });
const evalCtx: EvalContext = {
sessionId: this.state.id,
goal: this.state.goal,
artifacts: this.config.artifacts.list().map((a) => ({ path: a.path, content: a.content, type: a.type })),
assistantOutput: this.state.messages.filter((m) => m.role === 'assistant').map((m) => m.content).join('\n'),
workDir: process.cwd(),
};
const report = await this.config.evaluator.evaluate(evalCtx);
this.emit({ type: 'evaluation.completed', report, timestamp: now() });
}
this.emit({ type: 'session.completed', sessionId: this.state.id, summary: `Completed in ${this.turn} turns.`, timestamp: now() });
} catch (err) {
const errMsg = err instanceof Error ? err.message : String(err);
this.state.status = 'failed';
this.emit({ type: 'session.failed', sessionId: this.state.id, error: errMsg, timestamp: now() });
}
return this.state;
}
private async executeTool(tc: ToolCallRequest): Promise<unknown> {
const tool = this.config.tools.getByName(tc.name);
if (!tool) {
const errMsg = `Unknown tool: ${tc.name}`;
this.emit({ type: 'tool.failed', toolCallId: tc.id, error: errMsg, durationMs: 0, timestamp: now() });
return { error: errMsg };
}
// Parse input
let input: unknown;
try {
const raw = JSON.parse(tc.arguments);
input = tool.inputSchema.parse(raw);
} catch (err) {
const errMsg = `Invalid tool input: ${err instanceof Error ? err.message : String(err)}`;
this.emit({ type: 'tool.failed', toolCallId: tc.id, error: errMsg, durationMs: 0, timestamp: now() });
return { error: errMsg };
}
// Policy check
if (this.config.policy.isBlocked(tool.name)) {
this.emit({ type: 'tool.denied', toolCallId: tc.id, reason: 'Tool is blocked by policy.', timestamp: now() });
return { error: 'Tool blocked by policy.' };
}
const needsApproval = this.config.policy.requiresApproval(tool.permission, tool.name);
const record: ToolCallRecord = {
id: tc.id,
toolId: tool.id,
toolName: tool.name,
input,
status: needsApproval ? 'awaiting-approval' : 'queued',
};
this.emit({ type: 'tool.requested', toolCall: record, timestamp: now() });
if (needsApproval && this.config.approvalHandler) {
const approved = await this.config.approvalHandler(record);
if (!approved) {
this.emit({ type: 'tool.denied', toolCallId: tc.id, reason: 'User denied.', timestamp: now() });
return { error: 'Tool call denied by user.' };
}
this.emit({ type: 'tool.approved', toolCallId: tc.id, timestamp: now() });
}
// Execute
this.emit({ type: 'tool.started', toolCallId: tc.id, timestamp: now() });
const startMs = Date.now();
const ctx: ToolContext = {
sessionId: this.state.id,
workDir: process.cwd(),
signal: this.abortController.signal,
emit: (msg) => this.emit({ type: 'tool.progress', toolCallId: tc.id, message: msg, timestamp: now() }),
};
let retries = 0;
while (retries <= tool.retries) {
try {
const result = await Promise.race([
tool.execute(input, ctx),
new Promise((_, reject) => setTimeout(() => reject(new Error('Tool timeout')), tool.timeout)),
]);
const durationMs = Date.now() - startMs;
this.emit({ type: 'tool.finished', toolCallId: tc.id, result, durationMs, timestamp: now() });
this.config.metrics.record({ timestamp: now(), type: 'tool-call', toolName: tool.name, durationMs, success: true });
return result;
} catch (err) {
retries++;
if (retries > tool.retries) {
const durationMs = Date.now() - startMs;
const errMsg = err instanceof Error ? err.message : String(err);
this.emit({ type: 'tool.failed', toolCallId: tc.id, error: errMsg, durationMs, timestamp: now() });
this.config.metrics.record({ timestamp: now(), type: 'tool-call', toolName: tool.name, durationMs, success: false, error: errMsg });
return { error: errMsg };
}
this.config.metrics.record({ timestamp: now(), type: 'retry', toolName: tool.name, durationMs: 0, success: false });
}
}
return { error: 'Unexpected tool execution path.' };
}
pause(): void { this.state.status = 'paused'; }
resume(): void { if (this.state.status === 'paused') this.state.status = 'running'; }
cancel(): void { this.abortController.abort(); this.state.status = 'failed'; }
getState(): Readonly<SessionState> { return this.state; }
}