stevenkhan commited on 6 days ago

Commit

908562b

verified ·

1 Parent(s): a008404

Initial AI Harness - production-grade model-agnostic CLI agent runtime

Browse files

Files changed (37) hide show

EXTENSION_GUIDE.md +206 -0
README.md +133 -17
package.json +42 -0
src/cli/commands/chat.ts +62 -0
src/cli/commands/config.ts +27 -0
src/cli/commands/providers.ts +23 -0
src/cli/commands/run.ts +96 -0
src/cli/commands/skills.ts +19 -0
src/cli/commands/tools.ts +24 -0
src/cli/index.ts +65 -0
src/cli/renderers/index.ts +208 -0
src/cli/state/factory.ts +25 -0
src/cli/state/provider-resolver.ts +27 -0
src/core/artifacts/index.ts +48 -0
src/core/evaluators/index.ts +75 -0
src/core/events/index.ts +129 -0
src/core/index.ts +10 -0
src/core/observability/index.ts +82 -0
src/core/policy/index.ts +55 -0
src/core/provider/index.ts +99 -0
src/core/runtime/index.ts +279 -0
src/core/skills/index.ts +39 -0
src/core/tools/index.ts +113 -0
src/providers/anthropic/index.ts +159 -0
src/providers/gemini/index.ts +130 -0
src/providers/index.ts +5 -0
src/providers/openai/index.ts +148 -0
src/providers/openrouter/index.ts +48 -0
src/skills/coding/index.ts +36 -0
src/skills/docs/index.ts +37 -0
src/skills/index.ts +4 -0
src/skills/research/index.ts +33 -0
src/tools/fs/index.ts +73 -0
src/tools/index.ts +4 -0
src/tools/shell/index.ts +50 -0
src/tools/web/index.ts +46 -0
tsconfig.json +31 -0

EXTENSION_GUIDE.md ADDED Viewed

	@@ -0,0 +1,206 @@

+# Extension Guide
+This guide explains how to extend AI Harness with new providers, tools, skills, evaluator checks, and renderers.
+## Adding a New Provider
+1. Create `src/providers/your-provider/index.ts`
+2. Implement the `ProviderAdapter` interface:
+```typescript
+import type { ProviderAdapter, ProviderConfig, ModelInfo, ModelCapability, ModelInvocation, ModelResult, ModelStreamEvent } from '../../core/provider/index.js';
+export class YourProvider implements ProviderAdapter {
+  id = 'your-provider';
+  label = 'Your Provider';
+  constructor(config: ProviderConfig) {
+    // Store API key, base URL, etc.
+  }
+  async listModels(): Promise<ModelInfo[]> {
+    // Return available models with capabilities and pricing
+  }
+  supports(capability: ModelCapability): boolean {
+    // Return true for supported capabilities
+  }
+  async invoke(input: ModelInvocation): Promise<ModelResult> {
+    // Make a non-streaming request, return parsed result
+  }
+  async *stream(input: ModelInvocation): AsyncIterable<ModelStreamEvent> {
+    // Yield streaming events: text-delta, tool-call-start, tool-call-delta, tool-call-end, finish
+  }
+}
+```
+3. Register in `src/cli/state/provider-resolver.ts`:
+```typescript
+case 'your-provider':
+  return new YourProvider({ id: 'your-provider', ...config });
+```
+### Key requirements:
+- Normalize all message formats to the common `Message` type
+- Handle tool calling format differences internally
+- Emit proper `TokenUsage` in results (even if estimated)
+- Support `AbortSignal` for cancellation
+- Handle rate limits and retries internally
+---
+## Adding a New Tool
+1. Create `src/tools/your-category/index.ts`
+2. Define the tool with full typing:
+```typescript
+import { z } from 'zod';
+import type { ToolDef } from '../../core/tools/index.js';
+export const yourTool: ToolDef<
+  { param1: string; param2?: number },
+  { result: string }
+> = {
+  id: 'category.action',
+  name: 'your_tool_name',         // This is what the model sees
+  description: 'Clear, concise description of what this tool does.',
+  inputSchema: z.object({
+    param1: z.string().describe('What this parameter does'),
+    param2: z.number().optional().describe('Optional parameter'),
+  }),
+  outputSchema: z.object({
+    result: z.string(),
+  }),
+  permission: 'read',             // read | write | exec | network | dangerous
+  sideEffect: 'none',            // none | filesystem | network | process | mixed
+  timeout: 10000,                 // ms
+  retries: 1,                     // number of automatic retries on failure
+  tags: ['your-category'],
+  renderer: { icon: '🔧', color: 'blue' },
+  async execute(input, ctx) {
+    ctx.emit('Starting execution...');  // Progress updates
+    // Do the work
+    return { result: 'done' };
+  },
+};
+```
+3. Register in the runtime setup (e.g., `src/cli/commands/run.ts`):
+```typescript
+import { yourTool } from '../../tools/your-category/index.js';
+tools.register(yourTool);
+```
+### Permission levels:
+| Level | Meaning | Default policy |
+|-------|---------|----------------|
+| `read` | Only reads state | Auto-approved |
+| `write` | Modifies files/state | Confirm in `confirm-writes` mode |
+| `exec` | Runs processes | Confirm in `confirm-writes` mode |
+| `network` | Makes network requests | Confirm in `confirm-network` mode |
+| `dangerous` | Destructive/irreversible | Always requires approval |
+---
+## Adding a New Skill
+1. Create `src/skills/your-skill/index.ts`
+```typescript
+import type { SkillModule } from '../../core/skills/index.js';
+export const yourSkill: SkillModule = {
+  id: 'your-skill',
+  title: 'Your Skill Title',
+  description: 'One-line description.',
+  suggestedTools: ['tool_name_1', 'tool_name_2'],
+  tags: ['tag1', 'tag2'],
+  instructions: `Detailed multi-line instructions that will be injected into the system prompt when this skill is active.
+## Section 1
+- Rule 1
+- Rule 2
+## Section 2
+- Rule 3`,
+};
+```
+2. Register in the skill registry and export from `src/skills/index.ts`.
+### Tips:
+- Keep instructions focused and actionable
+- Reference specific tool names the model should use
+- Include both "do" and "don't" rules
+- Structure with markdown headings for readability
+---
+## Adding an Evaluator Check
+```typescript
+import type { EvalCheck } from '../../core/evaluators/index.js';
+export const yourCheck: EvalCheck = {
+  name: 'your-check-name',
+  async run(ctx) {
+    // ctx.goal — the original task goal
+    // ctx.assistantOutput — all assistant messages concatenated
+    // ctx.artifacts — generated artifacts
+    // ctx.workDir — working directory
+    const passed = /* your logic */;
+    return {
+      passed,
+      message: passed ? undefined : 'Explanation of what failed',
+    };
+  },
+};
+// Register:
+evaluator.addCheck(yourCheck);
+```
+### Common check patterns:
+- **Schema validation** — parse output with Zod
+- **File existence** — verify expected files were created
+- **Test execution** — run `npm test` and check exit code
+- **Content matching** — verify output contains required elements
+- **Length/quality** — check response isn't too short or repetitive
+---
+## Custom Renderer
+The `EventRenderer` class in `src/cli/renderers/index.ts` handles all terminal output. To customize:
+1. Subclass or modify `EventRenderer`
+2. Add cases for event types you want to render differently
+3. Use the box-drawing utilities for structured output
+```typescript
+class CustomRenderer extends EventRenderer {
+  override render(event: HarnessEvent): void {
+    if (event.type === 'your.custom.event') {
+      // Custom rendering
+      return;
+    }
+    super.render(event);
+  }
+}
+```
+---
+## Adding Custom Events
+1. Add your event type to the `HarnessEvent` union in `src/core/events/index.ts`
+2. Emit it via `eventBus.emit({ type: 'your.event', ... })`
+3. Handle it in the renderer
+The event system is intentionally open — any component can emit events, and any number of listeners can consume them.

README.md CHANGED Viewed

@@ -1,26 +1,142 @@
----
-tags:
-- ml-intern
----
-# stevenkhan/ai-harness
-<!-- ml-intern-provenance -->
-## Generated by ML Intern
-This model repository was generated by [ML Intern](https://github.com/huggingface/ml-intern), an agent for machine learning research and development on the Hugging Face Hub.
-- Try ML Intern: https://smolagents-ml-intern.hf.space
-- Source code: https://github.com/huggingface/ml-intern
-## Usage
-```python
-from transformers import AutoModelForCausalLM, AutoTokenizer
-model_id = "stevenkhan/ai-harness"
-tokenizer = AutoTokenizer.from_pretrained(model_id)
-model = AutoModelForCausalLM.from_pretrained(model_id)
 ```
-For non-causal architectures, replace `AutoModelForCausalLM` with the appropriate `AutoModel` class.

+# AI Harness
+A production-grade, model-agnostic CLI harness for agentic AI workflows.
+```
+╭─────────────────────────────────────╮
+│  ⚡ AI Harness  v0.1.0              │
+│  model-agnostic CLI agent runtime   │
+╰─────────────────────────────────────╯
+```
+## What is this?
+A terminal-first agent runtime. Not a toy chatbot. It supports:
+- **Multiple LLM providers** — OpenAI, Anthropic, Gemini, OpenRouter, any OpenAI-compatible endpoint
+- **Typed tool calling** — Zod-validated inputs/outputs, permissions, retries, timeouts
+- **Modular skills** — Attachable instruction packs per task
+- **Structured runtime** — Planner/executor/evaluator roles, budgets, loop detection
+- **Beautiful CLI output** — Streaming, spinners, panels, event timeline, metrics
+- **Observability** — Token usage, cost tracking, latency, success rates
+- **Evaluation** — Schema checks, rubric scoring, remediation loops
+- **Artifact handling** — Files, patches, logs, export to Markdown/JSON
+- **Safety & permissions** — Read/write/exec/network/dangerous levels with policy modes
+## Quick Start
+```bash
+# Install dependencies
+pnpm install
+# Build
+pnpm build
+# Interactive chat
+pnpm chat
+# Autonomous task
+node dist/cli/index.js run "refactor the auth module to use JWT"
+# List providers/models
+node dist/cli/index.js providers
+# List tools
+node dist/cli/index.js tools
+# List skills
+node dist/cli/index.js skills
+```
+## Configuration
+Set provider API keys via environment variables:
+```bash
+export OPENAI_API_KEY="sk-..."
+export ANTHROPIC_API_KEY="sk-ant-..."
+export GEMINI_API_KEY="AI..."
+export OPENROUTER_API_KEY="sk-or-..."
+```
+Override defaults with CLI flags:
+```bash
+harness chat --provider openai --model gpt-4o --skills coding research --verbose
+harness run "build a REST API" --provider anthropic --model claude-sonnet-4-20250514 --budget-tokens 100000
+```
+## Commands
+| Command | Description |
+|---------|-------------|
+| `harness chat` | Interactive multi-turn chat |
+| `harness run <goal>` | Autonomous task execution |
+| `harness providers` | List providers and models |
+| `harness tools` | List available tools |
+| `harness skills` | List available skills |
+| `harness config` | Show configuration |
+## Architecture
+```
+src/
+  core/
+    events/        — Event types, EventBus
+    provider/      — ProviderAdapter interface, message types
+    runtime/       — Session state, orchestration loop
+    tools/         — ToolRegistry, ToolDef, permissions
+    skills/        — SkillRegistry, SkillModule
+    evaluators/    — Evaluation checks, EvalReport
+    artifacts/     — ArtifactStore, export
+    policy/        — PolicyEngine, permission enforcement
+    observability/ — MetricsCollector, MetricEntry
+  providers/
+    openai/        — OpenAI adapter
+    anthropic/     — Anthropic adapter
+    gemini/        — Google Gemini adapter
+    openrouter/    — OpenRouter + OpenAI-compatible adapter
+  tools/
+    fs/            — read_file, write_file, list_directory
+    shell/         — shell_exec
+    web/           — web_fetch
+  skills/
+    coding/        — Software engineering instructions
+    research/      — Research & analysis instructions
+    docs/          — Technical writing instructions
+  cli/
+    index.ts       — Commander entry point
+    commands/      — chat, run, providers, tools, skills, config
+    renderers/     — EventRenderer, Spinner, box drawing, metrics
+    state/         — Provider resolver, runtime factory
 ```
+## Key Design Decisions
+### Event-driven architecture
+Everything flows through `EventBus`. Rendering, logging, metrics collection, and export all subscribe to the same event stream. This means you can add a new consumer (e.g., a web dashboard) without touching core logic.
+### Provider normalization
+All providers implement `ProviderAdapter` with `invoke()` and `stream()`. Message format, tool calling conventions, and response parsing are handled per-provider so the runtime never sees vendor-specific shapes.
+### Typed tools with Zod
+Every tool declares its input/output schemas with Zod. The runtime validates inputs before execution and can generate JSON Schema for model function-calling automatically.
+### Policy enforcement
+The `PolicyEngine` checks permission levels against the current policy mode before executing any tool. Denied tools return structured error messages to the model so it can adapt.
+### Evaluation loop
+After task completion, the `Evaluator` runs all registered checks. Failed checks can trigger remediation (retry with error context), preventing premature success declarations.
+## Extending
+See [EXTENSION_GUIDE.md](./EXTENSION_GUIDE.md) for detailed instructions on adding:
+- New providers
+- New tools
+- New skills
+- New evaluator checks
+- Custom renderers
+## License
+MIT

package.json ADDED Viewed

	@@ -0,0 +1,42 @@

+{
+  "name": "ai-harness",
+  "version": "0.1.0",
+  "private": true,
+  "type": "module",
+  "description": "Production-grade, model-agnostic CLI harness for agentic AI workflows",
+  "engines": { "node": ">=20.0.0" },
+  "scripts": {
+    "build": "tsc -b",
+    "dev": "tsc -b --watch",
+    "start": "node dist/cli/index.js",
+    "chat": "node dist/cli/index.js chat",
+    "run": "node dist/cli/index.js run",
+    "test": "vitest"
+  },
+  "dependencies": {
+    "zod": "^3.23.0",
+    "commander": "^12.1.0",
+    "chalk": "^5.3.0",
+    "ora": "^8.0.0",
+    "marked": "^12.0.0",
+    "marked-terminal": "^7.0.0",
+    "cli-table3": "^0.6.5",
+    "log-update": "^6.0.0",
+    "conf": "^13.0.0",
+    "better-sqlite3": "^11.0.0",
+    "nanoid": "^5.0.0",
+    "openai": "^4.52.0",
+    "@anthropic-ai/sdk": "^0.24.0",
+    "@google/generative-ai": "^0.12.0",
+    "eventsource-parser": "^1.1.0",
+    "undici": "^6.19.0",
+    "figures": "^6.1.0",
+    "boxen": "^8.0.0"
+  },
+  "devDependencies": {
+    "typescript": "^5.5.0",
+    "@types/node": "^20.14.0",
+    "@types/better-sqlite3": "^7.6.0",
+    "vitest": "^1.6.0"
+  }
+}

src/cli/commands/chat.ts ADDED Viewed

	@@ -0,0 +1,62 @@

+// ─── Chat Command ───────────────────────────────────────────────────────────
+import { createInterface } from 'readline';
+import { createRuntime } from '../state/factory.js';
+import { EventRenderer } from '../renderers/index.js';
+import type { Message } from '../../core/provider/index.js';
+import { now, type HarnessEvent } from '../../core/events/index.js';
+export async function chatCommand(opts: {
+  provider: string;
+  model?: string;
+  skills: string[];
+  verbose?: boolean;
+  compact?: boolean;
+}): Promise<void> {
+  const renderer = new EventRenderer({ verbose: opts.verbose ?? false, compact: opts.compact ?? false });
+  const { runtime, eventBus, provider } = await createRuntime(opts);
+  eventBus.on((event) => renderer.render(event));
+  const rl = createInterface({ input: process.stdin, output: process.stdout });
+  const prompt = () => new Promise<string>((resolve) => rl.question('\x1b[36m❯\x1b[0m ', resolve));
+  console.log(`\x1b[90mProvider: ${provider.label} | Type your message (Ctrl+C to exit)\x1b[0m\n`);
+  const messages: Message[] = [{ role: 'system', content: 'You are a helpful AI assistant with access to tools.' }];
+  while (true) {
+    const input = await prompt();
+    if (!input.trim()) continue;
+    if (input.trim() === '/quit' || input.trim() === '/exit') break;
+    messages.push({ role: 'user', content: input });
+    eventBus.emit({ type: 'model.request.start', provider: provider.id, model: opts.model ?? 'default', timestamp: now() });
+    const startMs = Date.now();
+    try {
+      for await (const event of provider.stream({
+        model: opts.model ?? (await provider.listModels())[0]!.id,
+        messages,
+        tools: runtime ? undefined : undefined, // Tools available through runtime
+      })) {
+        if (event.type === 'text-delta') {
+          process.stdout.write(event.text);
+        } else if (event.type === 'finish') {
+          const durationMs = Date.now() - startMs;
+          eventBus.emit({ type: 'model.request.end', provider: provider.id, model: opts.model ?? 'default', usage: event.result.usage, durationMs, timestamp: now() });
+          messages.push({ role: 'assistant', content: event.result.content });
+          if (!event.result.content.endsWith('\n')) process.stdout.write('\n');
+        }
+      }
+    } catch (err) {
+      const errMsg = err instanceof Error ? err.message : String(err);
+      console.error(`\x1b[31mError: ${errMsg}\x1b[0m`);
+    }
+    console.log();
+  }
+  rl.close();
+  console.log('\x1b[90mGoodbye.\x1b[0m');
+  process.exit(0);
+}

src/cli/commands/config.ts ADDED Viewed

	@@ -0,0 +1,27 @@

+// ─── Config Command ──────────────────────────────────────────────────────────
+export async function configCommand(opts: { show?: boolean }): Promise<void> {
+  const config = {
+    provider: process.env['HARNESS_PROVIDER'] ?? 'anthropic',
+    model: process.env['HARNESS_MODEL'] ?? 'auto (first from provider)',
+    approvalMode: process.env['HARNESS_APPROVAL'] ?? 'confirm-writes',
+    budget: {
+      maxTokens: process.env['HARNESS_BUDGET_TOKENS'] ?? 'unlimited',
+      maxCostUsd: process.env['HARNESS_BUDGET_COST'] ?? 'unlimited',
+    },
+    skills: ['coding', 'research', 'docs'],
+    tools: {
+      filesystem: true,
+      shell: true,
+      webFetch: true,
+    },
+    observability: {
+      level: 'standard',
+      saveTraces: true,
+    },
+  };
+  console.log('\n\x1b[1mCurrent Configuration:\x1b[0m\n');
+  console.log(JSON.stringify(config, null, 2));
+  console.log('\n\x1b[90mSet via environment variables (HARNESS_PROVIDER, HARNESS_MODEL, etc.) or --flags.\x1b[0m\n');
+}

src/cli/commands/providers.ts ADDED Viewed

	@@ -0,0 +1,23 @@

+// ─── Providers Command ──────────────────────────────────────────────────────
+import { resolveProvider } from '../state/provider-resolver.js';
+export async function providersCommand(): Promise<void> {
+  const providerIds = ['openai', 'anthropic', 'gemini', 'openrouter'];
+  console.log('\n\x1b[1mAvailable Providers:\x1b[0m\n');
+  for (const id of providerIds) {
+    try {
+      const provider = resolveProvider(id);
+      const models = await provider.listModels();
+      console.log(`  \x1b[36m${provider.label}\x1b[0m (${id})`);
+      for (const model of models) {
+        const cost = model.costPerMillionInput ? `$${model.costPerMillionInput}/M in, $${model.costPerMillionOutput}/M out` : 'pricing unknown';
+        console.log(`    • ${model.name} \x1b[90m(${model.id}) — ${(model.contextWindow / 1000).toFixed(0)}K ctx — ${cost}\x1b[0m`);
+      }
+      console.log();
+    } catch {
+      console.log(`  \x1b[33m${id}\x1b[0m — not configured\n`);
+    }
+  }
+}

src/cli/commands/run.ts ADDED Viewed

	@@ -0,0 +1,96 @@

+// ─── Run Command ────────────────────────────────────────────────────────────
+import { createRuntime } from '../state/factory.js';
+import { EventRenderer, renderMetrics } from '../renderers/index.js';
+import { Runtime, type RuntimeConfig } from '../../core/runtime/index.js';
+import { EventBus } from '../../core/events/index.js';
+import { ToolRegistry } from '../../core/tools/index.js';
+import { SkillRegistry } from '../../core/skills/index.js';
+import { PolicyEngine } from '../../core/policy/index.js';
+import { MetricsCollector } from '../../core/observability/index.js';
+import { ArtifactStore } from '../../core/artifacts/index.js';
+import { Evaluator, outputNotEmptyCheck, hasArtifactsCheck } from '../../core/evaluators/index.js';
+import { readFileTool, writeFileTool, listDirTool } from '../../tools/fs/index.js';
+import { shellExecTool } from '../../tools/shell/index.js';
+import { webFetchTool } from '../../tools/web/index.js';
+import { codingSkill } from '../../skills/coding/index.js';
+import { researchSkill } from '../../skills/research/index.js';
+import { docsSkill } from '../../skills/docs/index.js';
+import { resolveProvider } from '../state/provider-resolver.js';
+export async function runCommand(goal: string, opts: {
+  provider: string;
+  model?: string;
+  skills: string[];
+  maxTurns?: string;
+  budgetTokens?: string;
+  budgetCost?: string;
+  approval?: string;
+  verbose?: boolean;
+  compact?: boolean;
+}): Promise<void> {
+  const renderer = new EventRenderer({ verbose: opts.verbose ?? false, compact: opts.compact ?? false });
+  // Setup
+  const eventBus = new EventBus();
+  eventBus.on((event) => renderer.render(event));
+  const provider = resolveProvider(opts.provider);
+  const model = opts.model ?? (await provider.listModels())[0]!.id;
+  // Tools
+  const tools = new ToolRegistry();
+  tools.register(readFileTool);
+  tools.register(writeFileTool);
+  tools.register(listDirTool);
+  tools.register(shellExecTool);
+  tools.register(webFetchTool);
+  // Skills
+  const skills = new SkillRegistry();
+  skills.register(codingSkill);
+  skills.register(researchSkill);
+  skills.register(docsSkill);
+  // Policy
+  const policy = new PolicyEngine({
+    mode: (opts.approval as any) ?? 'confirm-writes',
+  });
+  // Metrics
+  const metrics = new MetricsCollector();
+  // Artifacts
+  const artifacts = new ArtifactStore();
+  // Evaluator
+  const evaluator = new Evaluator();
+  evaluator.addCheck(outputNotEmptyCheck);
+  // Runtime config
+  const config: RuntimeConfig = {
+    provider,
+    model,
+    tools,
+    skills,
+    policy,
+    metrics,
+    artifacts,
+    evaluator,
+    eventBus,
+    systemPrompt: `You are an AI agent executing tasks autonomously. You have tools available. Complete the goal thoroughly, verify your work, and report results.`,
+    activeSkills: opts.skills,
+    maxTurns: opts.maxTurns ? parseInt(opts.maxTurns) : 20,
+    budgetTokens: opts.budgetTokens ? parseInt(opts.budgetTokens) : undefined,
+    budgetCostUsd: opts.budgetCost ? parseFloat(opts.budgetCost) : undefined,
+  };
+  // Execute
+  const runtime = new Runtime(config, goal);
+  const state = await runtime.run();
+  // Summary
+  const summary = metrics.summarize(state.id);
+  renderMetrics(summary);
+  process.exit(state.status === 'completed' ? 0 : 1);
+}

src/cli/commands/skills.ts ADDED Viewed

	@@ -0,0 +1,19 @@

+// ─── Skills Command ──────────────────────────────────────────────────────────
+import { codingSkill } from '../../skills/coding/index.js';
+import { researchSkill } from '../../skills/research/index.js';
+import { docsSkill } from '../../skills/docs/index.js';
+export async function skillsCommand(): Promise<void> {
+  const skills = [codingSkill, researchSkill, docsSkill];
+  console.log('\n\x1b[1mAvailable Skills:\x1b[0m\n');
+  for (const skill of skills) {
+    console.log(`  \x1b[35m${skill.id}\x1b[0m — ${skill.title}`);
+    console.log(`    ${skill.description}`);
+    if (skill.suggestedTools?.length) {
+      console.log(`    \x1b[90mTools: ${skill.suggestedTools.join(', ')}\x1b[0m`);
+    }
+    console.log();
+  }
+}

src/cli/commands/tools.ts ADDED Viewed

	@@ -0,0 +1,24 @@

+// ─── Tools Command ──────────────────────────────────────────────────────────
+import { ToolRegistry } from '../../core/tools/index.js';
+import { readFileTool, writeFileTool, listDirTool } from '../../tools/fs/index.js';
+import { shellExecTool } from '../../tools/shell/index.js';
+import { webFetchTool } from '../../tools/web/index.js';
+export async function toolsCommand(): Promise<void> {
+  const registry = new ToolRegistry();
+  registry.register(readFileTool);
+  registry.register(writeFileTool);
+  registry.register(listDirTool);
+  registry.register(shellExecTool);
+  registry.register(webFetchTool);
+  console.log('\n\x1b[1mAvailable Tools:\x1b[0m\n');
+  for (const tool of registry.list()) {
+    const permColor = tool.permission === 'read' ? '32' : tool.permission === 'write' ? '33' : tool.permission === 'exec' ? '31' : tool.permission === 'network' ? '35' : '31';
+    console.log(`  ${tool.renderer?.icon ?? '🔧'} \x1b[1m${tool.name}\x1b[0m \x1b[${permColor}m[${tool.permission}]\x1b[0m`);
+    console.log(`    ${tool.description}`);
+    console.log(`    \x1b[90mtimeout: ${tool.timeout}ms | retries: ${tool.retries} | side-effect: ${tool.sideEffect}\x1b[0m`);
+    console.log();
+  }
+}

src/cli/index.ts ADDED Viewed

	@@ -0,0 +1,65 @@

+// ─── CLI Entry Point ────────────────────────────────────────────────────────
+import { Command } from 'commander';
+import { chatCommand } from './commands/chat.js';
+import { runCommand } from './commands/run.js';
+import { providersCommand } from './commands/providers.js';
+import { toolsCommand } from './commands/tools.js';
+import { skillsCommand } from './commands/skills.js';
+import { configCommand } from './commands/config.js';
+import { renderBanner } from './renderers/index.js';
+const program = new Command();
+program
+  .name('harness')
+  .description('Production-grade, model-agnostic AI agent CLI')
+  .version('0.1.0');
+program
+  .command('chat')
+  .description('Interactive chat with the AI agent')
+  .option('-p, --provider <provider>', 'Provider to use', 'anthropic')
+  .option('-m, --model <model>', 'Model to use')
+  .option('-s, --skills <skills...>', 'Skills to load', ['coding'])
+  .option('--verbose', 'Show detailed event information')
+  .option('--compact', 'Minimal output mode')
+  .action(chatCommand);
+program
+  .command('run <goal>')
+  .description('Run an autonomous task with the given goal')
+  .option('-p, --provider <provider>', 'Provider to use', 'anthropic')
+  .option('-m, --model <model>', 'Model to use')
+  .option('-s, --skills <skills...>', 'Skills to load', ['coding'])
+  .option('--max-turns <n>', 'Maximum turns', '20')
+  .option('--budget-tokens <n>', 'Token budget')
+  .option('--budget-cost <n>', 'Cost budget in USD')
+  .option('--approval <mode>', 'Approval mode', 'confirm-writes')
+  .option('--verbose', 'Show detailed event information')
+  .option('--compact', 'Minimal output mode')
+  .action(runCommand);
+program
+  .command('providers')
+  .description('List available providers and models')
+  .action(providersCommand);
+program
+  .command('tools')
+  .description('List available tools')
+  .action(toolsCommand);
+program
+  .command('skills')
+  .description('List available skills')
+  .action(skillsCommand);
+program
+  .command('config')
+  .description('Show or edit configuration')
+  .option('--show', 'Show current config')
+  .action(configCommand);
+// Show banner and parse
+renderBanner();
+program.parse();

src/cli/renderers/index.ts ADDED Viewed

	@@ -0,0 +1,208 @@

+// ─── CLI Renderer ───────────────────────────────────────────────────────────
+// Beautiful terminal output: streaming, panels, events, spinners, markdown.
+import type { HarnessEvent } from '../../core/events/index.js';
+// ─── ANSI color helpers (no dependency needed for basic colors) ──────────────
+const esc = (code: string) => `\x1b[${code}m`;
+const reset = esc('0');
+const bold = (s: string) => `${esc('1')}${s}${reset}`;
+const dim = (s: string) => `${esc('2')}${s}${reset}`;
+const green = (s: string) => `${esc('32')}${s}${reset}`;
+const yellow = (s: string) => `${esc('33')}${s}${reset}`;
+const blue = (s: string) => `${esc('34')}${s}${reset}`;
+const magenta = (s: string) => `${esc('35')}${s}${reset}`;
+const cyan = (s: string) => `${esc('36')}${s}${reset}`;
+const red = (s: string) => `${esc('31')}${s}${reset}`;
+const gray = (s: string) => `${esc('90')}${s}${reset}`;
+// ─── Box Drawing ────────────────────────────────────────────────────────────
+const BOX = { tl: '╭', tr: '╮', bl: '╰', br: '╯', h: '─', v: '│' };
+function box(title: string, content: string, color: (s: string) => string = cyan, width = 72): string {
+  const innerW = width - 4;
+  const titleStr = ` ${title} `;
+  const topLen = Math.max(0, innerW - titleStr.length);
+  const top = color(`${BOX.tl}${BOX.h}${titleStr}${'─'.repeat(topLen)}${BOX.tr}`);
+  const bot = color(`${BOX.bl}${'─'.repeat(innerW + 2)}${BOX.br}`);
+  const lines = content.split('\n').map((l) => {
+    const trimmed = l.slice(0, innerW);
+    return `${color(BOX.v)} ${trimmed}${' '.repeat(Math.max(0, innerW - stripAnsi(trimmed).length))} ${color(BOX.v)}`;
+  });
+  return [top, ...lines, bot].join('\n');
+}
+function stripAnsi(s: string): string {
+  return s.replace(/\x1b\[[0-9;]*m/g, '');
+}
+// ─── Spinner ────────────────────────────────────────────────────────────────
+const SPINNER_FRAMES = ['⠋', '⠙', '⠹', '⠸', '⠼', '⠴', '⠦', '⠧', '⠇', '⠏'];
+export class Spinner {
+  private frame = 0;
+  private interval: ReturnType<typeof setInterval> | null = null;
+  private message = '';
+  start(message: string): void {
+    this.message = message;
+    this.interval = setInterval(() => {
+      this.frame = (this.frame + 1) % SPINNER_FRAMES.length;
+      process.stdout.write(`\r${cyan(SPINNER_FRAMES[this.frame]!)} ${this.message}`);
+    }, 80);
+  }
+  update(message: string): void {
+    this.message = message;
+  }
+  stop(finalMessage?: string): void {
+    if (this.interval) clearInterval(this.interval);
+    this.interval = null;
+    process.stdout.write(`\r${' '.repeat(this.message.length + 4)}\r`);
+    if (finalMessage) console.log(finalMessage);
+  }
+}
+// ─── Event Renderer ─────────────────────────────────────────────────────────
+export interface RendererOptions {
+  verbose: boolean;
+  compact: boolean;
+}
+export class EventRenderer {
+  private opts: RendererOptions;
+  private spinner = new Spinner();
+  private streamBuffer = '';
+  constructor(opts: Partial<RendererOptions> = {}) {
+    this.opts = { verbose: false, compact: false, ...opts };
+  }
+  render(event: HarnessEvent): void {
+    switch (event.type) {
+      case 'session.started':
+        console.log('\n' + box('Session', `${bold('Goal:')} ${event.goal}\n${gray(`ID: ${event.sessionId}`)}`, cyan));
+        break;
+      case 'session.completed':
+        this.flushStream();
+        console.log('\n' + green(`✓ ${bold('Session completed')} — ${event.summary}`));
+        break;
+      case 'session.failed':
+        this.flushStream();
+        console.log('\n' + red(`✗ ${bold('Session failed')} — ${event.error}`));
+        break;
+      case 'plan.updated':
+        if (!this.opts.compact) {
+          const planStr = event.items.map((item) => {
+            const icon = item.status === 'completed' ? green('✓') : item.status === 'in_progress' ? yellow('▶') : item.status === 'failed' ? red('✗') : gray('○');
+            return `  ${icon} ${item.title}`;
+          }).join('\n');
+          console.log('\n' + box('Plan', planStr, magenta));
+        }
+        break;
+      case 'model.request.start':
+        this.spinner.start(`${event.provider}/${event.model} thinking…`);
+        break;
+      case 'model.request.end':
+        this.spinner.stop();
+        if (this.opts.verbose) {
+          console.log(gray(`  ⏱ ${event.durationMs}ms | ${event.usage.totalTokens} tokens | ~$${(event.usage.estimatedCostUsd ?? 0).toFixed(4)}`));
+        }
+        break;
+      case 'model.stream.delta':
+        this.streamBuffer += event.text;
+        process.stdout.write(event.text);
+        break;
+      case 'model.stream.end':
+        this.flushStream();
+        break;
+      case 'tool.requested':
+        console.log(`\n${blue('⚡')} ${bold(event.toolCall.toolName)} ${gray(`[${event.toolCall.id.slice(0, 8)}]`)}`);
+        if (this.opts.verbose) {
+          console.log(gray(`  Input: ${JSON.stringify(event.toolCall.input).slice(0, 200)}`));
+        }
+        break;
+      case 'tool.started':
+        this.spinner.start('Tool executing…');
+        break;
+      case 'tool.progress':
+        this.spinner.update(event.message);
+        break;
+      case 'tool.finished':
+        this.spinner.stop(green(`  ✓ Done`) + (this.opts.verbose ? gray(` (${event.durationMs}ms)`) : ''));
+        break;
+      case 'tool.failed':
+        this.spinner.stop(red(`  ✗ Failed: ${event.error}`));
+        break;
+      case 'tool.denied':
+        console.log(yellow(`  ⚠ Denied: ${event.reason}`));
+        break;
+      case 'evaluation.completed':
+        const r = event.report;
+        const icon = r.passed ? green('✓') : red('✗');
+        const checksStr = r.checks.map((c) => `  ${c.passed ? green('✓') : red('✗')} ${c.name}${c.message ? gray(` — ${c.message}`) : ''}`).join('\n');
+        console.log('\n' + box('Evaluation', `${icon} ${r.summary}\n${checksStr}`, r.passed ? green : red));
+        break;
+      case 'artifact.created':
+        console.log(`${magenta('📎')} Artifact: ${bold(event.artifact.title)} ${gray(`(${event.artifact.type})`)}`);
+        break;
+      case 'budget.warning':
+        console.log(yellow(`⚠ Budget warning: ${event.usage.totalTokens} tokens used`));
+        break;
+      case 'error':
+        console.log(red(`✗ Error: ${event.message}`));
+        break;
+    }
+  }
+  private flushStream(): void {
+    if (this.streamBuffer) {
+      // Ensure newline after streamed content
+      if (!this.streamBuffer.endsWith('\n')) process.stdout.write('\n');
+      this.streamBuffer = '';
+    }
+  }
+}
+// ─── Header / Banner ────────────────────────────────────────────────────────
+export function renderBanner(): void {
+  console.log(`
+${cyan(bold('╭─────────────────────────────────────╮'))}
+${cyan(bold('│'))}  ${bold('⚡ AI Harness')}  ${gray('v0.1.0')}              ${cyan(bold('│'))}
+${cyan(bold('│'))}  ${dim('model-agnostic CLI agent runtime')}  ${cyan(bold('│'))}
+${cyan(bold('╰─────────────────────────────────────╯'))}
+`);
+}
+// ─── Metrics Summary ────────────────────────────────────────────────────────
+export function renderMetrics(metrics: {
+  modelCalls: number; toolCalls: number; totalTokens: number;
+  estimatedCostUsd: number; totalDurationMs: number; toolSuccessRate: number;
+}): void {
+  const content = [
+    `${bold('Model calls:')}    ${metrics.modelCalls}`,
+    `${bold('Tool calls:')}     ${metrics.toolCalls} (${Math.round(metrics.toolSuccessRate * 100)}% success)`,
+    `${bold('Total tokens:')}   ${metrics.totalTokens.toLocaleString()}`,
+    `${bold('Est. cost:')}      $${metrics.estimatedCostUsd.toFixed(4)}`,
+    `${bold('Duration:')}       ${(metrics.totalDurationMs / 1000).toFixed(1)}s`,
+  ].join('\n');
+  console.log('\n' + box('Metrics', content, gray));
+}

src/cli/state/factory.ts ADDED Viewed

	@@ -0,0 +1,25 @@

+// ─── Runtime Factory ─────────────────────────────────────────────────────────
+import { EventBus } from '../../core/events/index.js';
+import type { ProviderAdapter } from '../../core/provider/index.js';
+import { resolveProvider } from './provider-resolver.js';
+export interface CreateRuntimeOpts {
+  provider: string;
+  model?: string;
+  skills: string[];
+  verbose?: boolean;
+  compact?: boolean;
+}
+export async function createRuntime(opts: CreateRuntimeOpts) {
+  const eventBus = new EventBus();
+  const provider = resolveProvider(opts.provider);
+  const model = opts.model ?? (await provider.listModels())[0]?.id ?? 'unknown';
+  return {
+    runtime: null, // Chat mode doesn't use full runtime
+    eventBus,
+    provider,
+    model,
+  };
+}

src/cli/state/provider-resolver.ts ADDED Viewed

	@@ -0,0 +1,27 @@

+// ─── Provider Resolver ───────────────────────────────────────────────────────
+import type { ProviderAdapter } from '../../core/provider/index.js';
+import { OpenAIProvider } from '../../providers/openai/index.js';
+import { AnthropicProvider } from '../../providers/anthropic/index.js';
+import { GeminiProvider } from '../../providers/gemini/index.js';
+import { OpenAICompatibleProvider, OpenRouterProvider } from '../../providers/openrouter/index.js';
+export function resolveProvider(id: string, config?: { apiKey?: string; baseUrl?: string }): ProviderAdapter {
+  switch (id) {
+    case 'openai':
+      return new OpenAIProvider({ id: 'openai', ...config });
+    case 'anthropic':
+      return new AnthropicProvider({ id: 'anthropic', ...config });
+    case 'gemini':
+      return new GeminiProvider({ id: 'gemini', ...config });
+    case 'openrouter':
+      return new OpenRouterProvider({ ...config });
+    default:
+      // Treat as OpenAI-compatible endpoint
+      return new OpenAICompatibleProvider({
+        id,
+        label: id,
+        baseUrl: config?.baseUrl ?? process.env[`${id.toUpperCase()}_BASE_URL`],
+        apiKey: config?.apiKey ?? process.env[`${id.toUpperCase()}_API_KEY`],
+      } as any);
+  }
+}

src/core/artifacts/index.ts ADDED Viewed

	@@ -0,0 +1,48 @@

+// ─── Artifacts ──────────────────────────────────────────────────────────────
+// First-class artifact persistence and retrieval.
+import type { ArtifactRecord } from '../events/index.js';
+export class ArtifactStore {
+  private artifacts: ArtifactRecord[] = [];
+  add(artifact: ArtifactRecord): void {
+    this.artifacts.push(artifact);
+  }
+  get(id: string): ArtifactRecord | undefined {
+    return this.artifacts.find((a) => a.id === id);
+  }
+  list(): ArtifactRecord[] {
+    return [...this.artifacts];
+  }
+  listByType(type: ArtifactRecord['type']): ArtifactRecord[] {
+    return this.artifacts.filter((a) => a.type === type);
+  }
+  exportMarkdown(): string {
+    if (!this.artifacts.length) return '# Artifacts\n\nNo artifacts generated.\n';
+    let md = '# Artifacts\n\n';
+    for (const a of this.artifacts) {
+      md += `## ${a.title}\n\n`;
+      md += `- **Type:** ${a.type}\n`;
+      md += `- **Created:** ${a.createdAt}\n`;
+      if (a.path) md += `- **Path:** \`${a.path}\`\n`;
+      if (a.content) {
+        md += `\n\`\`\`\n${a.content.slice(0, 2000)}\n\`\`\`\n`;
+      }
+      md += '\n---\n\n';
+    }
+    return md;
+  }
+  exportJson(): string {
+    return JSON.stringify(this.artifacts, null, 2);
+  }
+  clear(): void {
+    this.artifacts = [];
+  }
+}

src/core/evaluators/index.ts ADDED Viewed

	@@ -0,0 +1,75 @@

+// ─── Evaluators ─────────────────────────────────────────────────────────────
+// Structured evaluation hooks: schema checks, tests, rubric scoring.
+import type { EvaluationReport } from '../events/index.js';
+export interface EvalCheck {
+  name: string;
+  run(context: EvalContext): Promise<{ passed: boolean; message?: string }>;
+}
+export interface EvalContext {
+  sessionId: string;
+  goal: string;
+  artifacts: Array<{ path?: string; content?: string; type: string }>;
+  assistantOutput: string;
+  workDir: string;
+}
+export class Evaluator {
+  private checks: EvalCheck[] = [];
+  addCheck(check: EvalCheck): void {
+    this.checks.push(check);
+  }
+  async evaluate(ctx: EvalContext): Promise<EvaluationReport> {
+    const results: Array<{ name: string; passed: boolean; message?: string }> = [];
+    for (const check of this.checks) {
+      try {
+        const result = await check.run(ctx);
+        results.push({ name: check.name, ...result });
+      } catch (err) {
+        results.push({
+          name: check.name,
+          passed: false,
+          message: `Check threw: ${err instanceof Error ? err.message : String(err)}`,
+        });
+      }
+    }
+    const passed = results.every((r) => r.passed);
+    const score = results.length ? results.filter((r) => r.passed).length / results.length : 1;
+    return {
+      passed,
+      score,
+      checks: results,
+      summary: passed
+        ? `All ${results.length} checks passed.`
+        : `${results.filter((r) => !r.passed).length}/${results.length} checks failed.`,
+    };
+  }
+}
+// ─── Built-in Checks ────────────────────────────────────────────────────────
+export const outputNotEmptyCheck: EvalCheck = {
+  name: 'output-not-empty',
+  async run(ctx) {
+    return {
+      passed: ctx.assistantOutput.trim().length > 0,
+      message: ctx.assistantOutput.trim().length > 0 ? undefined : 'Assistant output is empty.',
+    };
+  },
+};
+export const hasArtifactsCheck: EvalCheck = {
+  name: 'has-artifacts',
+  async run(ctx) {
+    return {
+      passed: ctx.artifacts.length > 0,
+      message: ctx.artifacts.length > 0 ? undefined : 'No artifacts were produced.',
+    };
+  },
+};

src/core/events/index.ts ADDED Viewed

	@@ -0,0 +1,129 @@

+// ─── Core Event Types ───────────────────────────────────────────────────────
+// The internal event model is the backbone of the harness. Rendering, logging,
+// replay, export, and debugging all consume the same event stream.
+import { z } from 'zod';
+// ─── Plan ───────────────────────────────────────────────────────────────────
+export const PlanItemSchema = z.object({
+  id: z.string(),
+  title: z.string(),
+  status: z.enum(['pending', 'in_progress', 'completed', 'failed', 'skipped']),
+  detail: z.string().optional(),
+});
+export type PlanItem = z.infer<typeof PlanItemSchema>;
+// ─── Tool Call Record ───────────────────────────────────────────────────────
+export const ToolCallRecordSchema = z.object({
+  id: z.string(),
+  toolId: z.string(),
+  toolName: z.string(),
+  input: z.unknown(),
+  status: z.enum([
+    'queued',
+    'awaiting-approval',
+    'running',
+    'streaming',
+    'success',
+    'failed',
+    'denied',
+    'cancelled',
+  ]),
+  output: z.unknown().optional(),
+  error: z.string().optional(),
+  durationMs: z.number().optional(),
+  startedAt: z.string().optional(),
+  finishedAt: z.string().optional(),
+});
+export type ToolCallRecord = z.infer<typeof ToolCallRecordSchema>;
+// ─── Artifact Record ────────────────────────────────────────────────────────
+export const ArtifactRecordSchema = z.object({
+  id: z.string(),
+  type: z.enum(['file', 'patch', 'log', 'screenshot', 'json', 'markdown', 'other']),
+  path: z.string().optional(),
+  title: z.string(),
+  content: z.string().optional(),
+  mimeType: z.string().optional(),
+  createdAt: z.string(),
+});
+export type ArtifactRecord = z.infer<typeof ArtifactRecordSchema>;
+// ─── Evaluation Report ──────────────────────────────────────────────────────
+export const EvaluationReportSchema = z.object({
+  passed: z.boolean(),
+  score: z.number().min(0).max(1).optional(),
+  checks: z.array(z.object({
+    name: z.string(),
+    passed: z.boolean(),
+    message: z.string().optional(),
+  })),
+  summary: z.string().optional(),
+});
+export type EvaluationReport = z.infer<typeof EvaluationReportSchema>;
+// ─── Token Usage ────────────────────────────────────────────────────────────
+export const TokenUsageSchema = z.object({
+  promptTokens: z.number(),
+  completionTokens: z.number(),
+  totalTokens: z.number(),
+  estimatedCostUsd: z.number().optional(),
+});
+export type TokenUsage = z.infer<typeof TokenUsageSchema>;
+// ─── Harness Events (discriminated union) ───────────────────────────────────
+export type HarnessEvent =
+  | { type: 'session.started'; sessionId: string; goal: string; timestamp: string }
+  | { type: 'session.completed'; sessionId: string; summary: string; timestamp: string }
+  | { type: 'session.failed'; sessionId: string; error: string; timestamp: string }
+  | { type: 'plan.updated'; items: PlanItem[]; timestamp: string }
+  | { type: 'model.request.start'; provider: string; model: string; timestamp: string }
+  | { type: 'model.request.end'; provider: string; model: string; usage: TokenUsage; durationMs: number; timestamp: string }
+  | { type: 'model.stream.delta'; text: string; timestamp: string }
+  | { type: 'model.stream.end'; fullText: string; timestamp: string }
+  | { type: 'tool.requested'; toolCall: ToolCallRecord; timestamp: string }
+  | { type: 'tool.approved'; toolCallId: string; timestamp: string }
+  | { type: 'tool.denied'; toolCallId: string; reason: string; timestamp: string }
+  | { type: 'tool.started'; toolCallId: string; timestamp: string }
+  | { type: 'tool.progress'; toolCallId: string; message: string; timestamp: string }
+  | { type: 'tool.finished'; toolCallId: string; result: unknown; durationMs: number; timestamp: string }
+  | { type: 'tool.failed'; toolCallId: string; error: string; durationMs: number; timestamp: string }
+  | { type: 'evaluation.started'; timestamp: string }
+  | { type: 'evaluation.completed'; report: EvaluationReport; timestamp: string }
+  | { type: 'artifact.created'; artifact: ArtifactRecord; timestamp: string }
+  | { type: 'budget.warning'; usage: TokenUsage; limit: number; timestamp: string }
+  | { type: 'error'; message: string; code?: string; timestamp: string };
+// ─── Event Bus ───────────���──────────────────────────────────────────────────
+export type EventListener = (event: HarnessEvent) => void;
+export class EventBus {
+  private listeners: EventListener[] = [];
+  private history: HarnessEvent[] = [];
+  on(listener: EventListener): () => void {
+    this.listeners.push(listener);
+    return () => {
+      this.listeners = this.listeners.filter((l) => l !== listener);
+    };
+  }
+  emit(event: HarnessEvent): void {
+    this.history.push(event);
+    for (const listener of this.listeners) {
+      try { listener(event); } catch { /* swallow listener errors */ }
+    }
+  }
+  getHistory(): readonly HarnessEvent[] {
+    return this.history;
+  }
+  clear(): void {
+    this.history = [];
+  }
+}
+export function now(): string {
+  return new Date().toISOString();
+}

src/core/index.ts ADDED Viewed

	@@ -0,0 +1,10 @@

+// ─── Core barrel export ─────────────────────────────────────────────────────
+export * from './events/index.js';
+export * from './provider/index.js';
+export * from './tools/index.js';
+export * from './skills/index.js';
+export * from './policy/index.js';
+export * from './observability/index.js';
+export * from './artifacts/index.js';
+export * from './evaluators/index.js';
+export * from './runtime/index.js';

src/core/observability/index.ts ADDED Viewed

	@@ -0,0 +1,82 @@

+// ─── Observability ──────────────────────────────────────────────────────────
+// Captures metrics for model calls, tool calls, retries, token usage, and cost.
+import type { TokenUsage } from '../events/index.js';
+export interface MetricEntry {
+  timestamp: string;
+  type: 'model-call' | 'tool-call' | 'retry' | 'error';
+  provider?: string;
+  model?: string;
+  toolName?: string;
+  durationMs: number;
+  usage?: TokenUsage;
+  success: boolean;
+  error?: string;
+}
+export interface SessionMetrics {
+  sessionId: string;
+  startedAt: string;
+  endedAt?: string;
+  totalDurationMs: number;
+  modelCalls: number;
+  toolCalls: number;
+  toolSuccessRate: number;
+  retryCount: number;
+  totalTokens: number;
+  promptTokens: number;
+  completionTokens: number;
+  estimatedCostUsd: number;
+  evaluationPassRate: number;
+}
+export class MetricsCollector {
+  private entries: MetricEntry[] = [];
+  private sessionStart: number = Date.now();
+  record(entry: MetricEntry): void {
+    this.entries.push(entry);
+  }
+  getEntries(): readonly MetricEntry[] {
+    return this.entries;
+  }
+  summarize(sessionId: string): SessionMetrics {
+    const modelCalls = this.entries.filter((e) => e.type === 'model-call');
+    const toolCalls = this.entries.filter((e) => e.type === 'tool-call');
+    const retries = this.entries.filter((e) => e.type === 'retry');
+    const successfulTools = toolCalls.filter((e) => e.success);
+    let totalTokens = 0, promptTokens = 0, completionTokens = 0, costUsd = 0;
+    for (const entry of modelCalls) {
+      if (entry.usage) {
+        totalTokens += entry.usage.totalTokens;
+        promptTokens += entry.usage.promptTokens;
+        completionTokens += entry.usage.completionTokens;
+        costUsd += entry.usage.estimatedCostUsd ?? 0;
+      }
+    }
+    return {
+      sessionId,
+      startedAt: new Date(this.sessionStart).toISOString(),
+      totalDurationMs: Date.now() - this.sessionStart,
+      modelCalls: modelCalls.length,
+      toolCalls: toolCalls.length,
+      toolSuccessRate: toolCalls.length ? successfulTools.length / toolCalls.length : 1,
+      retryCount: retries.length,
+      totalTokens,
+      promptTokens,
+      completionTokens,
+      estimatedCostUsd: costUsd,
+      evaluationPassRate: 0, // computed externally
+    };
+  }
+  reset(): void {
+    this.entries = [];
+    this.sessionStart = Date.now();
+  }
+}

src/core/policy/index.ts ADDED Viewed

	@@ -0,0 +1,55 @@

+// ─── Policy & Permissions ───────────────────────────────────────────────────
+// Enforces confirmation rules for tool execution based on permission levels.
+import type { PermissionLevel } from '../tools/index.js';
+export type PolicyMode =
+  | 'auto'
+  | 'confirm-writes'
+  | 'confirm-network'
+  | 'manual-all'
+  | 'locked-down';
+export interface PolicyConfig {
+  mode: PolicyMode;
+  allowedTools?: string[];
+  blockedTools?: string[];
+  maxCostUsd?: number;
+  maxTokens?: number;
+}
+export class PolicyEngine {
+  constructor(private config: PolicyConfig) {}
+  requiresApproval(permission: PermissionLevel, toolName: string): boolean {
+    if (this.config.blockedTools?.includes(toolName)) return true;
+    if (this.config.allowedTools && !this.config.allowedTools.includes(toolName)) return true;
+    switch (this.config.mode) {
+      case 'auto':
+        return permission === 'dangerous';
+      case 'confirm-writes':
+        return ['write', 'exec', 'network', 'dangerous'].includes(permission);
+      case 'confirm-network':
+        return ['network', 'dangerous'].includes(permission);
+      case 'manual-all':
+        return true;
+      case 'locked-down':
+        return true;
+    }
+  }
+  isBlocked(toolName: string): boolean {
+    return this.config.blockedTools?.includes(toolName) ?? false;
+  }
+  checkBudget(currentUsage: { tokens: number; costUsd: number }): { ok: boolean; reason?: string } {
+    if (this.config.maxTokens && currentUsage.tokens >= this.config.maxTokens) {
+      return { ok: false, reason: `Token budget exhausted: ${currentUsage.tokens}/${this.config.maxTokens}` };
+    }
+    if (this.config.maxCostUsd && currentUsage.costUsd >= this.config.maxCostUsd) {
+      return { ok: false, reason: `Cost budget exhausted: $${currentUsage.costUsd.toFixed(4)}/$${this.config.maxCostUsd}` };
+    }
+    return { ok: true };
+  }
+}

src/core/provider/index.ts ADDED Viewed

	@@ -0,0 +1,99 @@

+// ─── Provider Adapter Interface ─────────────────────────────────────────────
+// Normalizes differences between model vendors while exposing capabilities.
+import { z } from 'zod';
+import type { TokenUsage } from '../events/index.js';
+// ─── Model Capabilities ─────────────────────────────────────────────────────
+export type ModelCapability =
+  | 'streaming'
+  | 'tool-calling'
+  | 'structured-output'
+  | 'vision'
+  | 'long-context'
+  | 'json-mode'
+  | 'reasoning';
+export interface ModelInfo {
+  id: string;
+  name: string;
+  provider: string;
+  contextWindow: number;
+  maxOutputTokens?: number;
+  capabilities: ModelCapability[];
+  costPerMillionInput?: number;
+  costPerMillionOutput?: number;
+}
+// ─── Messages ───────────────────────────────────────────────────────────────
+export type MessageRole = 'system' | 'user' | 'assistant' | 'tool';
+export interface Message {
+  role: MessageRole;
+  content: string;
+  name?: string;
+  toolCallId?: string;
+  toolCalls?: ToolCallRequest[];
+}
+export interface ToolCallRequest {
+  id: string;
+  name: string;
+  arguments: string; // JSON string
+}
+// ─── Invocation ─────────────────────────────────────────────────────────────
+export interface ModelInvocation {
+  model: string;
+  messages: Message[];
+  tools?: ToolDefinition[];
+  temperature?: number;
+  maxTokens?: number;
+  jsonMode?: boolean;
+  stop?: string[];
+  signal?: AbortSignal;
+}
+export interface ToolDefinition {
+  name: string;
+  description: string;
+  parameters: Record<string, unknown>; // JSON Schema
+}
+// ─── Result ─────────────────────────────────────────────────────────────────
+export interface ModelResult {
+  content: string;
+  toolCalls?: ToolCallRequest[];
+  usage: TokenUsage;
+  finishReason: 'stop' | 'tool_calls' | 'length' | 'error';
+  rawResponse?: unknown;
+}
+// ─── Streaming Events ───────────────────────────────────────────────────────
+export type ModelStreamEvent =
+  | { type: 'text-delta'; text: string }
+  | { type: 'tool-call-start'; id: string; name: string }
+  | { type: 'tool-call-delta'; id: string; argumentsDelta: string }
+  | { type: 'tool-call-end'; id: string }
+  | { type: 'finish'; result: ModelResult };
+// ─── Provider Adapter ───────────────────────────────────────────────────────
+export interface ProviderAdapter {
+  id: string;
+  label: string;
+  listModels(): Promise<ModelInfo[]>;
+  invoke(input: ModelInvocation): Promise<ModelResult>;
+  stream(input: ModelInvocation): AsyncIterable<ModelStreamEvent>;
+  supports(capability: ModelCapability, model?: string): boolean;
+}
+// ─── Provider Config ────────────────────────────────────────────────────────
+export const ProviderConfigSchema = z.object({
+  id: z.string(),
+  apiKey: z.string().optional(),
+  baseUrl: z.string().optional(),
+  defaultModel: z.string().optional(),
+  orgId: z.string().optional(),
+  headers: z.record(z.string()).optional(),
+});
+export type ProviderConfig = z.infer<typeof ProviderConfigSchema>;

src/core/runtime/index.ts ADDED Viewed

	@@ -0,0 +1,279 @@

+// ─── Runtime ────────────────────────────────────────────────────────────────
+// The heart of the harness: manages session state, orchestration, tool execution,
+// budgets, retries, and structured planner/executor/evaluator roles.
+import { nanoid } from 'nanoid';
+import { EventBus, now, type HarnessEvent, type PlanItem, type TokenUsage, type ToolCallRecord } from '../events/index.js';
+import type { ProviderAdapter, ModelInvocation, Message, ToolCallRequest, ModelResult } from '../provider/index.js';
+import { ToolRegistry, type ToolDef, type ToolContext } from '../tools/index.js';
+import { SkillRegistry } from '../skills/index.js';
+import { PolicyEngine } from '../policy/index.js';
+import { MetricsCollector } from '../observability/index.js';
+import { ArtifactStore } from '../artifacts/index.js';
+import { Evaluator, type EvalContext } from '../evaluators/index.js';
+// ─── Session State ──────────────────────────────────────────────────────────
+export interface SessionState {
+  id: string;
+  goal: string;
+  plan: PlanItem[];
+  messages: Message[];
+  artifacts: string[]; // artifact IDs
+  provider: string;
+  model: string;
+  skills: string[];
+  budgetUsed: { tokens: number; costUsd: number };
+  status: 'running' | 'completed' | 'failed' | 'paused';
+  retries: number;
+  maxRetries: number;
+  createdAt: string;
+}
+export interface RuntimeConfig {
+  provider: ProviderAdapter;
+  model: string;
+  tools: ToolRegistry;
+  skills: SkillRegistry;
+  policy: PolicyEngine;
+  metrics: MetricsCollector;
+  artifacts: ArtifactStore;
+  evaluator: Evaluator;
+  eventBus: EventBus;
+  systemPrompt: string;
+  activeSkills: string[];
+  maxRetries?: number;
+  maxTurns?: number;
+  budgetTokens?: number;
+  budgetCostUsd?: number;
+  approvalHandler?: (toolCall: ToolCallRecord) => Promise<boolean>;
+}
+// ─── Runtime ────────────────────────────────────────────────────────────────
+export class Runtime {
+  private state: SessionState;
+  private config: RuntimeConfig;
+  private abortController = new AbortController();
+  private turn = 0;
+  constructor(config: RuntimeConfig, goal: string) {
+    this.config = config;
+    this.state = {
+      id: nanoid(),
+      goal,
+      plan: [],
+      messages: [],
+      artifacts: [],
+      provider: config.provider.id,
+      model: config.model,
+      skills: config.activeSkills,
+      budgetUsed: { tokens: 0, costUsd: 0 },
+      status: 'running',
+      retries: 0,
+      maxRetries: config.maxRetries ?? 3,
+      createdAt: now(),
+    };
+  }
+  get sessionId(): string { return this.state.id; }
+  get status(): string { return this.state.status; }
+  private emit(event: HarnessEvent): void {
+    this.config.eventBus.emit(event);
+  }
+  async run(): Promise<SessionState> {
+    this.emit({ type: 'session.started', sessionId: this.state.id, goal: this.state.goal, timestamp: now() });
+    // Build system message
+    const skillInstructions = this.config.skills.buildInstructions(this.config.activeSkills);
+    const systemMsg: Message = {
+      role: 'system',
+      content: [this.config.systemPrompt, skillInstructions].filter(Boolean).join('\n\n---\n\n'),
+    };
+    this.state.messages = [systemMsg, { role: 'user', content: this.state.goal }];
+    const maxTurns = this.config.maxTurns ?? 20;
+    try {
+      while (this.state.status === 'running' && this.turn < maxTurns) {
+        this.turn++;
+        // Budget check
+        const budgetCheck = this.config.policy.checkBudget(this.state.budgetUsed);
+        if (!budgetCheck.ok) {
+          this.emit({ type: 'budget.warning', usage: { promptTokens: 0, completionTokens: 0, totalTokens: this.state.budgetUsed.tokens, estimatedCostUsd: this.state.budgetUsed.costUsd }, limit: this.config.budgetTokens ?? 0, timestamp: now() });
+          break;
+        }
+        // Invoke model
+        const toolDefs = this.config.tools.listForModel();
+        const invocation: ModelInvocation = {
+          model: this.config.model,
+          messages: this.state.messages,
+          tools: toolDefs,
+          signal: this.abortController.signal,
+        };
+        this.emit({ type: 'model.request.start', provider: this.config.provider.id, model: this.config.model, timestamp: now() });
+        const startMs = Date.now();
+        let result: ModelResult;
+        try {
+          result = await this.config.provider.invoke(invocation);
+        } catch (err) {
+          const errMsg = err instanceof Error ? err.message : String(err);
+          this.emit({ type: 'error', message: `Model call failed: ${errMsg}`, timestamp: now() });
+          if (this.state.retries < this.state.maxRetries) {
+            this.state.retries++;
+            continue;
+          }
+          this.state.status = 'failed';
+          break;
+        }
+        const durationMs = Date.now() - startMs;
+        this.state.budgetUsed.tokens += result.usage.totalTokens;
+        this.state.budgetUsed.costUsd += result.usage.estimatedCostUsd ?? 0;
+        this.emit({ type: 'model.request.end', provider: this.config.provider.id, model: this.config.model, usage: result.usage, durationMs, timestamp: now() });
+        this.config.metrics.record({ timestamp: now(), type: 'model-call', provider: this.config.provider.id, model: this.config.model, durationMs, usage: result.usage, success: true });
+        // Handle assistant response
+        if (result.content) {
+          this.emit({ type: 'model.stream.end', fullText: result.content, timestamp: now() });
+        }
+        this.state.messages.push({
+          role: 'assistant',
+          content: result.content,
+          toolCalls: result.toolCalls,
+        });
+        // If no tool calls, we're done
+        if (!result.toolCalls || result.toolCalls.length === 0) {
+          this.state.status = 'completed';
+          break;
+        }
+        // Execute tool calls
+        for (const tc of result.toolCalls) {
+          const toolResult = await this.executeTool(tc);
+          this.state.messages.push({
+            role: 'tool',
+            content: typeof toolResult === 'string' ? toolResult : JSON.stringify(toolResult),
+            toolCallId: tc.id,
+          });
+        }
+      }
+      // Run evaluation
+      if (this.state.status === 'completed') {
+        this.emit({ type: 'evaluation.started', timestamp: now() });
+        const evalCtx: EvalContext = {
+          sessionId: this.state.id,
+          goal: this.state.goal,
+          artifacts: this.config.artifacts.list().map((a) => ({ path: a.path, content: a.content, type: a.type })),
+          assistantOutput: this.state.messages.filter((m) => m.role === 'assistant').map((m) => m.content).join('\n'),
+          workDir: process.cwd(),
+        };
+        const report = await this.config.evaluator.evaluate(evalCtx);
+        this.emit({ type: 'evaluation.completed', report, timestamp: now() });
+      }
+      this.emit({ type: 'session.completed', sessionId: this.state.id, summary: `Completed in ${this.turn} turns.`, timestamp: now() });
+    } catch (err) {
+      const errMsg = err instanceof Error ? err.message : String(err);
+      this.state.status = 'failed';
+      this.emit({ type: 'session.failed', sessionId: this.state.id, error: errMsg, timestamp: now() });
+    }
+    return this.state;
+  }
+  private async executeTool(tc: ToolCallRequest): Promise<unknown> {
+    const tool = this.config.tools.getByName(tc.name);
+    if (!tool) {
+      const errMsg = `Unknown tool: ${tc.name}`;
+      this.emit({ type: 'tool.failed', toolCallId: tc.id, error: errMsg, durationMs: 0, timestamp: now() });
+      return { error: errMsg };
+    }
+    // Parse input
+    let input: unknown;
+    try {
+      const raw = JSON.parse(tc.arguments);
+      input = tool.inputSchema.parse(raw);
+    } catch (err) {
+      const errMsg = `Invalid tool input: ${err instanceof Error ? err.message : String(err)}`;
+      this.emit({ type: 'tool.failed', toolCallId: tc.id, error: errMsg, durationMs: 0, timestamp: now() });
+      return { error: errMsg };
+    }
+    // Policy check
+    if (this.config.policy.isBlocked(tool.name)) {
+      this.emit({ type: 'tool.denied', toolCallId: tc.id, reason: 'Tool is blocked by policy.', timestamp: now() });
+      return { error: 'Tool blocked by policy.' };
+    }
+    const needsApproval = this.config.policy.requiresApproval(tool.permission, tool.name);
+    const record: ToolCallRecord = {
+      id: tc.id,
+      toolId: tool.id,
+      toolName: tool.name,
+      input,
+      status: needsApproval ? 'awaiting-approval' : 'queued',
+    };
+    this.emit({ type: 'tool.requested', toolCall: record, timestamp: now() });
+    if (needsApproval && this.config.approvalHandler) {
+      const approved = await this.config.approvalHandler(record);
+      if (!approved) {
+        this.emit({ type: 'tool.denied', toolCallId: tc.id, reason: 'User denied.', timestamp: now() });
+        return { error: 'Tool call denied by user.' };
+      }
+      this.emit({ type: 'tool.approved', toolCallId: tc.id, timestamp: now() });
+    }
+    // Execute
+    this.emit({ type: 'tool.started', toolCallId: tc.id, timestamp: now() });
+    const startMs = Date.now();
+    const ctx: ToolContext = {
+      sessionId: this.state.id,
+      workDir: process.cwd(),
+      signal: this.abortController.signal,
+      emit: (msg) => this.emit({ type: 'tool.progress', toolCallId: tc.id, message: msg, timestamp: now() }),
+    };
+    let retries = 0;
+    while (retries <= tool.retries) {
+      try {
+        const result = await Promise.race([
+          tool.execute(input, ctx),
+          new Promise((_, reject) => setTimeout(() => reject(new Error('Tool timeout')), tool.timeout)),
+        ]);
+        const durationMs = Date.now() - startMs;
+        this.emit({ type: 'tool.finished', toolCallId: tc.id, result, durationMs, timestamp: now() });
+        this.config.metrics.record({ timestamp: now(), type: 'tool-call', toolName: tool.name, durationMs, success: true });
+        return result;
+      } catch (err) {
+        retries++;
+        if (retries > tool.retries) {
+          const durationMs = Date.now() - startMs;
+          const errMsg = err instanceof Error ? err.message : String(err);
+          this.emit({ type: 'tool.failed', toolCallId: tc.id, error: errMsg, durationMs, timestamp: now() });
+          this.config.metrics.record({ timestamp: now(), type: 'tool-call', toolName: tool.name, durationMs, success: false, error: errMsg });
+          return { error: errMsg };
+        }
+        this.config.metrics.record({ timestamp: now(), type: 'retry', toolName: tool.name, durationMs: 0, success: false });
+      }
+    }
+    return { error: 'Unexpected tool execution path.' };
+  }
+  pause(): void { this.state.status = 'paused'; }
+  resume(): void { if (this.state.status === 'paused') this.state.status = 'running'; }
+  cancel(): void { this.abortController.abort(); this.state.status = 'failed'; }
+  getState(): Readonly<SessionState> { return this.state; }
+}

src/core/skills/index.ts ADDED Viewed

	@@ -0,0 +1,39 @@

+// ─── Skills System ──────────────────────────────────────────────────────────
+// Modular instruction packs attachable per task or session.
+export interface SkillModule {
+  id: string;
+  title: string;
+  description: string;
+  instructions: string;
+  suggestedTools?: string[];
+  tags?: string[];
+}
+export class SkillRegistry {
+  private skills = new Map<string, SkillModule>();
+  register(skill: SkillModule): void {
+    this.skills.set(skill.id, skill);
+  }
+  get(id: string): SkillModule | undefined {
+    return this.skills.get(id);
+  }
+  list(): SkillModule[] {
+    return [...this.skills.values()];
+  }
+  resolve(ids: string[]): SkillModule[] {
+    return ids.map((id) => this.get(id)).filter((s): s is SkillModule => !!s);
+  }
+  buildInstructions(ids: string[]): string {
+    const skills = this.resolve(ids);
+    if (!skills.length) return '';
+    return skills
+      .map((s) => `## Skill: ${s.title}\n\n${s.instructions}`)
+      .join('\n\n---\n\n');
+  }
+}

src/core/tools/index.ts ADDED Viewed

	@@ -0,0 +1,113 @@

+// ─── Tool Registry ──────────────────────────────────────────────────────────
+// First-class subsystem for typed, permissioned, observable tool execution.
+import { z, ZodSchema } from 'zod';
+// ─── Permission Levels ──────────────────────────────────────────────────────
+export type PermissionLevel = 'read' | 'write' | 'exec' | 'network' | 'dangerous';
+// ─── Tool Status ────────────────────────────────────────────────────────────
+export type ToolStatus =
+  | 'queued'
+  | 'awaiting-approval'
+  | 'running'
+  | 'streaming'
+  | 'success'
+  | 'failed'
+  | 'denied'
+  | 'cancelled';
+// ─── Side Effect Classification ─────────────────────────────────────────────
+export type SideEffect = 'none' | 'filesystem' | 'network' | 'process' | 'mixed';
+// ─── Tool Definition ────────────────────────────────────────────────────────
+export interface ToolDef<TInput = unknown, TOutput = unknown> {
+  id: string;
+  name: string;
+  description: string;
+  inputSchema: ZodSchema<TInput>;
+  outputSchema: ZodSchema<TOutput>;
+  permission: PermissionLevel;
+  sideEffect: SideEffect;
+  timeout: number; // ms
+  retries: number;
+  tags?: string[];
+  renderer?: {
+    icon?: string;
+    color?: string;
+    compact?: boolean;
+  };
+  execute(input: TInput, ctx: ToolContext): Promise<TOutput>;
+}
+// ─── Tool Context ───────────────────────────────────────────────────────────
+export interface ToolContext {
+  sessionId: string;
+  workDir: string;
+  signal?: AbortSignal;
+  emit(message: string): void; // for progress updates
+}
+// ─── Tool Registry ──────────────────────────────────────────────────────────
+export class ToolRegistry {
+  private tools = new Map<string, ToolDef<any, any>>();
+  register<TI, TO>(tool: ToolDef<TI, TO>): void {
+    if (this.tools.has(tool.id)) {
+      throw new Error(`Tool already registered: ${tool.id}`);
+    }
+    this.tools.set(tool.id, tool);
+  }
+  get(id: string): ToolDef | undefined {
+    return this.tools.get(id);
+  }
+  getByName(name: string): ToolDef | undefined {
+    for (const tool of this.tools.values()) {
+      if (tool.name === name) return tool;
+    }
+    return undefined;
+  }
+  list(): ToolDef[] {
+    return [...this.tools.values()];
+  }
+  listForModel(): Array<{ name: string; description: string; parameters: Record<string, unknown> }> {
+    return this.list().map((t) => ({
+      name: t.name,
+      description: t.description,
+      parameters: this.zodToJsonSchema(t.inputSchema),
+    }));
+  }
+  private zodToJsonSchema(schema: ZodSchema): Record<string, unknown> {
+    // Minimal Zod-to-JSON-Schema converter for tool definitions
+    // In production, use zod-to-json-schema package
+    const desc = (schema as any)._def;
+    if (desc?.typeName === 'ZodObject') {
+      const shape = desc.shape();
+      const properties: Record<string, unknown> = {};
+      const required: string[] = [];
+      for (const [key, value] of Object.entries(shape)) {
+        const fieldDef = (value as any)._def;
+        properties[key] = { type: this.zodTypeToJson(fieldDef), description: fieldDef?.description || '' };
+        if (fieldDef?.typeName !== 'ZodOptional') required.push(key);
+      }
+      return { type: 'object', properties, required };
+    }
+    return { type: 'object', properties: {} };
+  }
+  private zodTypeToJson(def: any): string {
+    switch (def?.typeName) {
+      case 'ZodString': return 'string';
+      case 'ZodNumber': return 'number';
+      case 'ZodBoolean': return 'boolean';
+      case 'ZodArray': return 'array';
+      case 'ZodOptional': return this.zodTypeToJson(def.innerType?._def);
+      default: return 'string';
+    }
+  }
+}

src/providers/anthropic/index.ts ADDED Viewed

	@@ -0,0 +1,159 @@

+// ─── Anthropic Provider Adapter ──────────────────────────────────────────────
+import type {
+  ProviderAdapter, ProviderConfig, ModelInfo, ModelCapability,
+  ModelInvocation, ModelResult, ModelStreamEvent, ToolCallRequest,
+} from '../../core/provider/index.js';
+import type { TokenUsage } from '../../core/events/index.js';
+export class AnthropicProvider implements ProviderAdapter {
+  id = 'anthropic';
+  label = 'Anthropic';
+  private apiKey: string;
+  private baseUrl: string;
+  constructor(config: ProviderConfig) {
+    this.apiKey = config.apiKey ?? process.env['ANTHROPIC_API_KEY'] ?? '';
+    this.baseUrl = config.baseUrl ?? 'https://api.anthropic.com';
+  }
+  async listModels(): Promise<ModelInfo[]> {
+    return [
+      { id: 'claude-sonnet-4-20250514', name: 'Claude Sonnet 4', provider: 'anthropic', contextWindow: 200000, maxOutputTokens: 64000, capabilities: ['streaming', 'tool-calling', 'vision', 'long-context', 'reasoning'], costPerMillionInput: 3, costPerMillionOutput: 15 },
+      { id: 'claude-opus-4-20250514', name: 'Claude Opus 4', provider: 'anthropic', contextWindow: 200000, maxOutputTokens: 32000, capabilities: ['streaming', 'tool-calling', 'vision', 'long-context', 'reasoning'], costPerMillionInput: 15, costPerMillionOutput: 75 },
+      { id: 'claude-3-5-haiku-20241022', name: 'Claude 3.5 Haiku', provider: 'anthropic', contextWindow: 200000, maxOutputTokens: 8192, capabilities: ['streaming', 'tool-calling', 'vision'], costPerMillionInput: 0.8, costPerMillionOutput: 4 },
+    ];
+  }
+  supports(capability: ModelCapability): boolean {
+    return ['streaming', 'tool-calling', 'vision', 'long-context'].includes(capability);
+  }
+  async invoke(input: ModelInvocation): Promise<ModelResult> {
+    const body = this.buildBody(input);
+    const res = await fetch(`${this.baseUrl}/v1/messages`, {
+      method: 'POST',
+      headers: {
+        'Content-Type': 'application/json',
+        'x-api-key': this.apiKey,
+        'anthropic-version': '2023-06-01',
+      },
+      body: JSON.stringify(body),
+      signal: input.signal,
+    });
+    if (!res.ok) throw new Error(`Anthropic API error: ${res.status} ${await res.text()}`);
+    const data = await res.json() as any;
+    return this.parseResponse(data);
+  }
+  async *stream(input: ModelInvocation): AsyncIterable<ModelStreamEvent> {
+    const body = { ...this.buildBody(input), stream: true };
+    const res = await fetch(`${this.baseUrl}/v1/messages`, {
+      method: 'POST',
+      headers: {
+        'Content-Type': 'application/json',
+        'x-api-key': this.apiKey,
+        'anthropic-version': '2023-06-01',
+      },
+      body: JSON.stringify(body),
+      signal: input.signal,
+    });
+    if (!res.ok) throw new Error(`Anthropic API error: ${res.status} ${await res.text()}`);
+    const reader = res.body!.getReader();
+    const decoder = new TextDecoder();
+    let buffer = '';
+    let fullText = '';
+    const toolCalls: Array<{ id: string; name: string; args: string }> = [];
+    let currentToolIdx = -1;
+    while (true) {
+      const { done, value } = await reader.read();
+      if (done) break;
+      buffer += decoder.decode(value, { stream: true });
+      const lines = buffer.split('\n');
+      buffer = lines.pop() ?? '';
+      for (const line of lines) {
+        if (!line.startsWith('data: ')) continue;
+        const event = JSON.parse(line.slice(6)) as any;
+        if (event.type === 'content_block_start' && event.content_block?.type === 'tool_use') {
+          const tc = { id: event.content_block.id, name: event.content_block.name, args: '' };
+          toolCalls.push(tc);
+          currentToolIdx = toolCalls.length - 1;
+          yield { type: 'tool-call-start', id: tc.id, name: tc.name };
+        } else if (event.type === 'content_block_delta') {
+          if (event.delta?.type === 'text_delta') {
+            fullText += event.delta.text;
+            yield { type: 'text-delta', text: event.delta.text };
+          } else if (event.delta?.type === 'input_json_delta' && currentToolIdx >= 0) {
+            toolCalls[currentToolIdx]!.args += event.delta.partial_json;
+            yield { type: 'tool-call-delta', id: toolCalls[currentToolIdx]!.id, argumentsDelta: event.delta.partial_json };
+          }
+        } else if (event.type === 'content_block_stop' && currentToolIdx >= 0) {
+          yield { type: 'tool-call-end', id: toolCalls[currentToolIdx]!.id };
+          currentToolIdx = -1;
+        } else if (event.type === 'message_delta') {
+          const usage: TokenUsage = {
+            promptTokens: event.usage?.input_tokens ?? 0,
+            completionTokens: event.usage?.output_tokens ?? 0,
+            totalTokens: (event.usage?.input_tokens ?? 0) + (event.usage?.output_tokens ?? 0),
+          };
+          const tcReqs: ToolCallRequest[] = toolCalls.map((t) => ({ id: t.id, name: t.name, arguments: t.args }));
+          yield {
+            type: 'finish',
+            result: { content: fullText, toolCalls: tcReqs.length ? tcReqs : undefined, usage, finishReason: tcReqs.length ? 'tool_calls' : 'stop' },
+          };
+        }
+      }
+    }
+  }
+  private buildBody(input: ModelInvocation): Record<string, unknown> {
+    const systemMsg = input.messages.find((m) => m.role === 'system');
+    const nonSystemMsgs = input.messages.filter((m) => m.role !== 'system');
+    const messages = nonSystemMsgs.map((m) => {
+      if (m.role === 'tool') {
+        return { role: 'user', content: [{ type: 'tool_result', tool_use_id: m.toolCallId, content: m.content }] };
+      }
+      if (m.role === 'assistant' && m.toolCalls?.length) {
+        const content: any[] = [];
+        if (m.content) content.push({ type: 'text', text: m.content });
+        for (const tc of m.toolCalls) {
+          content.push({ type: 'tool_use', id: tc.id, name: tc.name, input: JSON.parse(tc.arguments) });
+        }
+        return { role: 'assistant', content };
+      }
+      return { role: m.role === 'user' ? 'user' : 'assistant', content: m.content };
+    });
+    const body: Record<string, unknown> = { model: input.model, messages, max_tokens: input.maxTokens ?? 8192 };
+    if (systemMsg) body.system = systemMsg.content;
+    if (input.tools?.length) {
+      body.tools = input.tools.map((t) => ({ name: t.name, description: t.description, input_schema: t.parameters }));
+    }
+    if (input.temperature !== undefined) body.temperature = input.temperature;
+    return body;
+  }
+  private parseResponse(data: any): ModelResult {
+    let content = '';
+    const toolCalls: ToolCallRequest[] = [];
+    for (const block of data.content ?? []) {
+      if (block.type === 'text') content += block.text;
+      if (block.type === 'tool_use') toolCalls.push({ id: block.id, name: block.name, arguments: JSON.stringify(block.input) });
+    }
+    const usage: TokenUsage = {
+      promptTokens: data.usage?.input_tokens ?? 0,
+      completionTokens: data.usage?.output_tokens ?? 0,
+      totalTokens: (data.usage?.input_tokens ?? 0) + (data.usage?.output_tokens ?? 0),
+    };
+    return {
+      content,
+      toolCalls: toolCalls.length ? toolCalls : undefined,
+      usage,
+      finishReason: toolCalls.length ? 'tool_calls' : data.stop_reason === 'max_tokens' ? 'length' : 'stop',
+    };
+  }
+}

src/providers/gemini/index.ts ADDED Viewed

	@@ -0,0 +1,130 @@

+// ─── Gemini Provider Adapter ────────────────────────────────────────────────
+import type {
+  ProviderAdapter, ProviderConfig, ModelInfo, ModelCapability,
+  ModelInvocation, ModelResult, ModelStreamEvent, ToolCallRequest,
+} from '../../core/provider/index.js';
+import type { TokenUsage } from '../../core/events/index.js';
+export class GeminiProvider implements ProviderAdapter {
+  id = 'gemini';
+  label = 'Google Gemini';
+  private apiKey: string;
+  private baseUrl: string;
+  constructor(config: ProviderConfig) {
+    this.apiKey = config.apiKey ?? process.env['GEMINI_API_KEY'] ?? '';
+    this.baseUrl = config.baseUrl ?? 'https://generativelanguage.googleapis.com/v1beta';
+  }
+  async listModels(): Promise<ModelInfo[]> {
+    return [
+      { id: 'gemini-2.5-pro', name: 'Gemini 2.5 Pro', provider: 'gemini', contextWindow: 1000000, maxOutputTokens: 65536, capabilities: ['streaming', 'tool-calling', 'vision', 'long-context', 'reasoning'], costPerMillionInput: 1.25, costPerMillionOutput: 10 },
+      { id: 'gemini-2.5-flash', name: 'Gemini 2.5 Flash', provider: 'gemini', contextWindow: 1000000, maxOutputTokens: 65536, capabilities: ['streaming', 'tool-calling', 'vision', 'long-context'], costPerMillionInput: 0.15, costPerMillionOutput: 0.6 },
+    ];
+  }
+  supports(capability: ModelCapability): boolean {
+    return ['streaming', 'tool-calling', 'vision', 'long-context'].includes(capability);
+  }
+  async invoke(input: ModelInvocation): Promise<ModelResult> {
+    const body = this.buildBody(input);
+    const url = `${this.baseUrl}/models/${input.model}:generateContent?key=${this.apiKey}`;
+    const res = await fetch(url, {
+      method: 'POST',
+      headers: { 'Content-Type': 'application/json' },
+      body: JSON.stringify(body),
+      signal: input.signal,
+    });
+    if (!res.ok) throw new Error(`Gemini API error: ${res.status} ${await res.text()}`);
+    const data = await res.json() as any;
+    return this.parseResponse(data);
+  }
+  async *stream(input: ModelInvocation): AsyncIterable<ModelStreamEvent> {
+    const body = this.buildBody(input);
+    const url = `${this.baseUrl}/models/${input.model}:streamGenerateContent?key=${this.apiKey}&alt=sse`;
+    const res = await fetch(url, {
+      method: 'POST',
+      headers: { 'Content-Type': 'application/json' },
+      body: JSON.stringify(body),
+      signal: input.signal,
+    });
+    if (!res.ok) throw new Error(`Gemini API error: ${res.status} ${await res.text()}`);
+    const reader = res.body!.getReader();
+    const decoder = new TextDecoder();
+    let buffer = '';
+    let fullText = '';
+    while (true) {
+      const { done, value } = await reader.read();
+      if (done) break;
+      buffer += decoder.decode(value, { stream: true });
+      const lines = buffer.split('\n');
+      buffer = lines.pop() ?? '';
+      for (const line of lines) {
+        if (!line.startsWith('data: ')) continue;
+        const chunk = JSON.parse(line.slice(6)) as any;
+        for (const part of chunk.candidates?.[0]?.content?.parts ?? []) {
+          if (part.text) {
+            fullText += part.text;
+            yield { type: 'text-delta', text: part.text };
+          }
+          if (part.functionCall) {
+            const id = `gemini-tc-${Date.now()}`;
+            yield { type: 'tool-call-start', id, name: part.functionCall.name };
+            const args = JSON.stringify(part.functionCall.args ?? {});
+            yield { type: 'tool-call-delta', id, argumentsDelta: args };
+            yield { type: 'tool-call-end', id };
+          }
+        }
+      }
+    }
+    const usage: TokenUsage = { promptTokens: 0, completionTokens: 0, totalTokens: 0 };
+    yield { type: 'finish', result: { content: fullText, usage, finishReason: 'stop' } };
+  }
+  private buildBody(input: ModelInvocation): Record<string, unknown> {
+    const contents: any[] = [];
+    let systemInstruction: string | undefined;
+    for (const m of input.messages) {
+      if (m.role === 'system') { systemInstruction = m.content; continue; }
+      const role = m.role === 'assistant' ? 'model' : 'user';
+      if (m.role === 'tool') {
+        contents.push({ role: 'function', parts: [{ functionResponse: { name: m.name ?? 'tool', response: { result: m.content } } }] });
+      } else {
+        contents.push({ role, parts: [{ text: m.content }] });
+      }
+    }
+    const body: Record<string, unknown> = { contents };
+    if (systemInstruction) body.systemInstruction = { parts: [{ text: systemInstruction }] };
+    if (input.tools?.length) {
+      body.tools = [{ functionDeclarations: input.tools.map((t) => ({ name: t.name, description: t.description, parameters: t.parameters })) }];
+    }
+    if (input.temperature !== undefined) body.generationConfig = { temperature: input.temperature, maxOutputTokens: input.maxTokens ?? 8192 };
+    return body;
+  }
+  private parseResponse(data: any): ModelResult {
+    let content = '';
+    const toolCalls: ToolCallRequest[] = [];
+    for (const part of data.candidates?.[0]?.content?.parts ?? []) {
+      if (part.text) content += part.text;
+      if (part.functionCall) {
+        toolCalls.push({ id: `gemini-tc-${Date.now()}`, name: part.functionCall.name, arguments: JSON.stringify(part.functionCall.args ?? {}) });
+      }
+    }
+    const meta = data.usageMetadata ?? {};
+    const usage: TokenUsage = {
+      promptTokens: meta.promptTokenCount ?? 0,
+      completionTokens: meta.candidatesTokenCount ?? 0,
+      totalTokens: meta.totalTokenCount ?? 0,
+    };
+    return { content, toolCalls: toolCalls.length ? toolCalls : undefined, usage, finishReason: toolCalls.length ? 'tool_calls' : 'stop' };
+  }
+}

src/providers/index.ts ADDED Viewed

	@@ -0,0 +1,5 @@

+// ─── Providers barrel export ─────────────────────────────────────────────────
+export { OpenAIProvider } from './openai/index.js';
+export { AnthropicProvider } from './anthropic/index.js';
+export { GeminiProvider } from './gemini/index.js';
+export { OpenAICompatibleProvider, OpenRouterProvider } from './openrouter/index.js';

src/providers/openai/index.ts ADDED Viewed

	@@ -0,0 +1,148 @@

+// ─── OpenAI Provider Adapter ────────────────────────────────────────────────
+import type {
+  ProviderAdapter, ProviderConfig, ModelInfo, ModelCapability,
+  ModelInvocation, ModelResult, ModelStreamEvent, ToolCallRequest,
+} from '../../core/provider/index.js';
+import type { TokenUsage } from '../../core/events/index.js';
+export class OpenAIProvider implements ProviderAdapter {
+  id = 'openai';
+  label = 'OpenAI';
+  private apiKey: string;
+  private baseUrl: string;
+  constructor(config: ProviderConfig) {
+    this.apiKey = config.apiKey ?? process.env['OPENAI_API_KEY'] ?? '';
+    this.baseUrl = config.baseUrl ?? 'https://api.openai.com/v1';
+  }
+  async listModels(): Promise<ModelInfo[]> {
+    return [
+      { id: 'gpt-4o', name: 'GPT-4o', provider: 'openai', contextWindow: 128000, maxOutputTokens: 16384, capabilities: ['streaming', 'tool-calling', 'vision', 'json-mode', 'structured-output'], costPerMillionInput: 2.5, costPerMillionOutput: 10 },
+      { id: 'gpt-4o-mini', name: 'GPT-4o Mini', provider: 'openai', contextWindow: 128000, maxOutputTokens: 16384, capabilities: ['streaming', 'tool-calling', 'json-mode', 'structured-output'], costPerMillionInput: 0.15, costPerMillionOutput: 0.6 },
+      { id: 'o1', name: 'o1', provider: 'openai', contextWindow: 200000, maxOutputTokens: 100000, capabilities: ['streaming', 'tool-calling', 'reasoning'], costPerMillionInput: 15, costPerMillionOutput: 60 },
+    ];
+  }
+  supports(capability: ModelCapability): boolean {
+    return ['streaming', 'tool-calling', 'vision', 'json-mode', 'structured-output'].includes(capability);
+  }
+  async invoke(input: ModelInvocation): Promise<ModelResult> {
+    const body = this.buildBody(input, false);
+    const res = await fetch(`${this.baseUrl}/chat/completions`, {
+      method: 'POST',
+      headers: { 'Content-Type': 'application/json', 'Authorization': `Bearer ${this.apiKey}` },
+      body: JSON.stringify(body),
+      signal: input.signal,
+    });
+    if (!res.ok) throw new Error(`OpenAI API error: ${res.status} ${await res.text()}`);
+    const data = await res.json() as any;
+    return this.parseResponse(data);
+  }
+  async *stream(input: ModelInvocation): AsyncIterable<ModelStreamEvent> {
+    const body = this.buildBody(input, true);
+    const res = await fetch(`${this.baseUrl}/chat/completions`, {
+      method: 'POST',
+      headers: { 'Content-Type': 'application/json', 'Authorization': `Bearer ${this.apiKey}` },
+      body: JSON.stringify(body),
+      signal: input.signal,
+    });
+    if (!res.ok) throw new Error(`OpenAI API error: ${res.status} ${await res.text()}`);
+    const reader = res.body!.getReader();
+    const decoder = new TextDecoder();
+    let buffer = '';
+    let fullText = '';
+    const toolCalls = new Map<number, { id: string; name: string; args: string }>();
+    while (true) {
+      const { done, value } = await reader.read();
+      if (done) break;
+      buffer += decoder.decode(value, { stream: true });
+      const lines = buffer.split('\n');
+      buffer = lines.pop() ?? '';
+      for (const line of lines) {
+        if (!line.startsWith('data: ') || line === 'data: [DONE]') continue;
+        const chunk = JSON.parse(line.slice(6)) as any;
+        const delta = chunk.choices?.[0]?.delta;
+        if (!delta) continue;
+        if (delta.content) {
+          fullText += delta.content;
+          yield { type: 'text-delta', text: delta.content };
+        }
+        if (delta.tool_calls) {
+          for (const tc of delta.tool_calls) {
+            const idx = tc.index as number;
+            if (tc.id) {
+              toolCalls.set(idx, { id: tc.id, name: tc.function?.name ?? '', args: '' });
+              yield { type: 'tool-call-start', id: tc.id, name: tc.function?.name ?? '' };
+            }
+            if (tc.function?.arguments) {
+              const existing = toolCalls.get(idx)!;
+              existing.args += tc.function.arguments;
+              yield { type: 'tool-call-delta', id: existing.id, argumentsDelta: tc.function.arguments };
+            }
+          }
+        }
+        if (chunk.choices?.[0]?.finish_reason) {
+          for (const [, tc] of toolCalls) {
+            yield { type: 'tool-call-end', id: tc.id };
+          }
+          const usage: TokenUsage = {
+            promptTokens: chunk.usage?.prompt_tokens ?? 0,
+            completionTokens: chunk.usage?.completion_tokens ?? 0,
+            totalTokens: chunk.usage?.total_tokens ?? 0,
+          };
+          const tcArray: ToolCallRequest[] = [...toolCalls.values()].map((t) => ({
+            id: t.id, name: t.name, arguments: t.args,
+          }));
+          yield {
+            type: 'finish',
+            result: { content: fullText, toolCalls: tcArray.length ? tcArray : undefined, usage, finishReason: tcArray.length ? 'tool_calls' : 'stop' },
+          };
+        }
+      }
+    }
+  }
+  private buildBody(input: ModelInvocation, stream: boolean): Record<string, unknown> {
+    const messages = input.messages.map((m) => {
+      if (m.role === 'tool') return { role: 'tool', content: m.content, tool_call_id: m.toolCallId };
+      if (m.toolCalls) return { role: 'assistant', content: m.content || null, tool_calls: m.toolCalls.map((tc) => ({ id: tc.id, type: 'function', function: { name: tc.name, arguments: tc.arguments } })) };
+      return { role: m.role, content: m.content };
+    });
+    const body: Record<string, unknown> = { model: input.model, messages, stream };
+    if (input.tools?.length) {
+      body.tools = input.tools.map((t) => ({ type: 'function', function: { name: t.name, description: t.description, parameters: t.parameters } }));
+    }
+    if (input.temperature !== undefined) body.temperature = input.temperature;
+    if (input.maxTokens) body.max_tokens = input.maxTokens;
+    if (input.jsonMode) body.response_format = { type: 'json_object' };
+    if (stream) body.stream_options = { include_usage: true };
+    return body;
+  }
+  private parseResponse(data: any): ModelResult {
+    const choice = data.choices[0];
+    const toolCalls: ToolCallRequest[] | undefined = choice.message.tool_calls?.map((tc: any) => ({
+      id: tc.id, name: tc.function.name, arguments: tc.function.arguments,
+    }));
+    const usage: TokenUsage = {
+      promptTokens: data.usage?.prompt_tokens ?? 0,
+      completionTokens: data.usage?.completion_tokens ?? 0,
+      totalTokens: data.usage?.total_tokens ?? 0,
+    };
+    return {
+      content: choice.message.content ?? '',
+      toolCalls,
+      usage,
+      finishReason: toolCalls?.length ? 'tool_calls' : choice.finish_reason === 'length' ? 'length' : 'stop',
+    };
+  }
+}

src/providers/openrouter/index.ts ADDED Viewed

	@@ -0,0 +1,48 @@

+// ─── OpenAI-Compatible Provider (OpenRouter, local, etc.) ───────────────────
+import { OpenAIProvider } from '../openai/index.js';
+import type { ProviderConfig, ModelInfo } from '../../core/provider/index.js';
+export class OpenAICompatibleProvider extends OpenAIProvider {
+  override id: string;
+  override label: string;
+  constructor(config: ProviderConfig & { label?: string }) {
+    super(config);
+    this.id = config.id;
+    this.label = config.label ?? config.id;
+  }
+  override async listModels(): Promise<ModelInfo[]> {
+    // For compatible endpoints, attempt to list models from the API
+    try {
+      const baseUrl = (this as any).baseUrl;
+      const res = await fetch(`${baseUrl}/models`, {
+        headers: { 'Authorization': `Bearer ${(this as any).apiKey}` },
+      });
+      if (!res.ok) return [];
+      const data = await res.json() as any;
+      return (data.data ?? []).map((m: any) => ({
+        id: m.id,
+        name: m.id,
+        provider: this.id,
+        contextWindow: m.context_length ?? 128000,
+        capabilities: ['streaming', 'tool-calling'] as any[],
+      }));
+    } catch {
+      return [];
+    }
+  }
+}
+// ─── OpenRouter convenience subclass ────────────────────────────────────────
+export class OpenRouterProvider extends OpenAICompatibleProvider {
+  constructor(config: Omit<ProviderConfig, 'id'>) {
+    super({
+      ...config,
+      id: 'openrouter',
+      baseUrl: config.baseUrl ?? 'https://openrouter.ai/api/v1',
+      apiKey: config.apiKey ?? process.env['OPENROUTER_API_KEY'] ?? '',
+      label: 'OpenRouter',
+    } as any);
+  }
+}

src/skills/coding/index.ts ADDED Viewed

	@@ -0,0 +1,36 @@

+// ─── Coding Skill ───────────────────────────────────────────────────────────
+import type { SkillModule } from '../../core/skills/index.js';
+export const codingSkill: SkillModule = {
+  id: 'coding',
+  title: 'Software Engineering',
+  description: 'Write, refactor, test, and debug code across languages and frameworks.',
+  suggestedTools: ['read_file', 'write_file', 'list_directory', 'shell_exec'],
+  tags: ['code', 'dev', 'engineering'],
+  instructions: `You are an expert software engineer. Follow these rules:
+## Planning
+- Break complex tasks into subtasks. Plan before coding.
+- State assumptions explicitly before implementing.
+## Code Quality
+- Write clean, typed, well-documented code.
+- Follow existing project conventions (formatting, naming, structure).
+- Prefer small, focused functions over large monoliths.
+- Add error handling for all I/O operations.
+## Verification
+- After writing code, run the test suite or relevant checks.
+- If tests fail, read the error, diagnose the root cause, and fix it.
+- Do not declare success without verification.
+## File Operations
+- Read files before modifying them to understand context.
+- Make minimal targeted edits rather than rewriting entire files.
+- Create new files when the change is substantial.
+## Communication
+- Explain your reasoning concisely.
+- Show relevant code snippets in your response.
+- Report test results and any remaining issues.`,
+};

src/skills/docs/index.ts ADDED Viewed

	@@ -0,0 +1,37 @@

+// ─── Docs Skill ─────────────────────────────────────────────────────────────
+import type { SkillModule } from '../../core/skills/index.js';
+export const docsSkill: SkillModule = {
+  id: 'docs',
+  title: 'Documentation',
+  description: 'Write clear, structured technical documentation, READMEs, guides, and API references.',
+  suggestedTools: ['read_file', 'write_file', 'list_directory'],
+  tags: ['docs', 'writing', 'technical-writing'],
+  instructions: `You are an expert technical writer. Follow these rules:
+## Structure
+- Use clear hierarchical headings (h1 for title, h2 for sections, h3 for subsections).
+- Start with a brief overview/summary before diving into details.
+- Include a table of contents for documents longer than 3 sections.
+## Clarity
+- Write for the target audience (developers, users, or operators).
+- Define terms on first use.
+- Use active voice and present tense.
+- Keep sentences short and paragraphs focused.
+## Code Examples
+- Include working code examples for every API or feature.
+- Show both minimal and realistic usage patterns.
+- Annotate non-obvious lines with comments.
+## Completeness
+- Cover: what it is, why to use it, how to install, how to use, configuration, troubleshooting.
+- Include prerequisites and environment requirements.
+- Document error conditions and edge cases.
+## Format
+- Use Markdown with consistent formatting.
+- Use tables for structured comparisons.
+- Use admonitions (> **Note:**, > **Warning:**) for important callouts.`,
+};

src/skills/index.ts ADDED Viewed

	@@ -0,0 +1,4 @@

+// ─── Skills barrel export ────────────────────────────────────────────────────
+export { codingSkill } from './coding/index.js';
+export { researchSkill } from './research/index.js';
+export { docsSkill } from './docs/index.js';

src/skills/research/index.ts ADDED Viewed

	@@ -0,0 +1,33 @@

+// ─── Research Skill ──────────────────────────────────────────────────────────
+import type { SkillModule } from '../../core/skills/index.js';
+export const researchSkill: SkillModule = {
+  id: 'research',
+  title: 'Research & Analysis',
+  description: 'Gather information from web sources, documentation, and APIs to answer questions or inform decisions.',
+  suggestedTools: ['web_fetch', 'read_file'],
+  tags: ['research', 'analysis', 'information'],
+  instructions: `You are a thorough researcher. Follow these rules:
+## Methodology
+- Start with the primary source (official docs, original paper, authoritative API).
+- Cross-reference multiple sources for claims that matter.
+- Distinguish facts from opinions and speculation.
+- Note when information may be outdated.
+## Output
+- Structure findings with clear headings and bullet points.
+- Cite sources with URLs when available.
+- Highlight key findings, contradictions, and gaps.
+- Provide a summary with confidence level for each major claim.
+## Web Research
+- Fetch documentation pages and extract relevant sections.
+- Do not hallucinate URLs or content you haven't fetched.
+- If a page is unavailable, note it and try alternatives.
+## Depth
+- For technical questions, go to the source code or spec.
+- For market/product questions, find multiple data points.
+- Always answer the actual question, not adjacent ones.`,
+};

src/tools/fs/index.ts ADDED Viewed

	@@ -0,0 +1,73 @@

+// ─── Filesystem Tool ────────────────────────────────────────────────────────
+import { z } from 'zod';
+import { readFile, writeFile, readdir, stat, mkdir } from 'fs/promises';
+import { join, resolve } from 'path';
+import type { ToolDef } from '../../core/tools/index.js';
+export const readFileTool: ToolDef<{ path: string }, { content: string; size: number }> = {
+  id: 'fs.read',
+  name: 'read_file',
+  description: 'Read the contents of a file at the given path. Returns the text content and file size in bytes.',
+  inputSchema: z.object({ path: z.string().describe('Absolute or relative file path to read') }),
+  outputSchema: z.object({ content: z.string(), size: z.number() }),
+  permission: 'read',
+  sideEffect: 'none',
+  timeout: 10000,
+  retries: 0,
+  tags: ['filesystem'],
+  renderer: { icon: '📄', color: 'blue' },
+  async execute(input, ctx) {
+    const filePath = resolve(ctx.workDir, input.path);
+    const content = await readFile(filePath, 'utf-8');
+    const stats = await stat(filePath);
+    return { content, size: stats.size };
+  },
+};
+export const writeFileTool: ToolDef<{ path: string; content: string }, { written: boolean; path: string }> = {
+  id: 'fs.write',
+  name: 'write_file',
+  description: 'Write content to a file. Creates parent directories if needed. Overwrites existing content.',
+  inputSchema: z.object({
+    path: z.string().describe('File path to write to'),
+    content: z.string().describe('Content to write'),
+  }),
+  outputSchema: z.object({ written: z.boolean(), path: z.string() }),
+  permission: 'write',
+  sideEffect: 'filesystem',
+  timeout: 10000,
+  retries: 0,
+  tags: ['filesystem'],
+  renderer: { icon: '✏️', color: 'yellow' },
+  async execute(input, ctx) {
+    const filePath = resolve(ctx.workDir, input.path);
+    const dir = filePath.substring(0, filePath.lastIndexOf('/'));
+    await mkdir(dir, { recursive: true });
+    await writeFile(filePath, input.content, 'utf-8');
+    return { written: true, path: filePath };
+  },
+};
+export const listDirTool: ToolDef<{ path: string }, { entries: Array<{ name: string; type: string; size: number }> }> = {
+  id: 'fs.list',
+  name: 'list_directory',
+  description: 'List files and directories at the given path with type and size information.',
+  inputSchema: z.object({ path: z.string().describe('Directory path to list') }),
+  outputSchema: z.object({ entries: z.array(z.object({ name: z.string(), type: z.string(), size: z.number() })) }),
+  permission: 'read',
+  sideEffect: 'none',
+  timeout: 10000,
+  retries: 0,
+  tags: ['filesystem'],
+  renderer: { icon: '📁', color: 'blue' },
+  async execute(input, ctx) {
+    const dirPath = resolve(ctx.workDir, input.path);
+    const items = await readdir(dirPath, { withFileTypes: true });
+    const entries = await Promise.all(items.map(async (item) => {
+      const fullPath = join(dirPath, item.name);
+      const stats = await stat(fullPath).catch(() => ({ size: 0 }));
+      return { name: item.name, type: item.isDirectory() ? 'directory' : 'file', size: stats.size };
+    }));
+    return { entries };
+  },
+};

src/tools/index.ts ADDED Viewed

	@@ -0,0 +1,4 @@

+// ─── Tools barrel export ─────────────────────────────────────────────────────
+export { readFileTool, writeFileTool, listDirTool } from './fs/index.js';
+export { shellExecTool } from './shell/index.js';
+export { webFetchTool } from './web/index.js';

src/tools/shell/index.ts ADDED Viewed

	@@ -0,0 +1,50 @@

+// ─── Shell Tool ─────────────────────────────────────────────────────────────
+import { z } from 'zod';
+import { exec } from 'child_process';
+import { promisify } from 'util';
+import type { ToolDef } from '../../core/tools/index.js';
+const execAsync = promisify(exec);
+export const shellExecTool: ToolDef<
+  { command: string; timeout?: number },
+  { stdout: string; stderr: string; exitCode: number }
+> = {
+  id: 'shell.exec',
+  name: 'shell_exec',
+  description: 'Execute a shell command and return stdout, stderr, and exit code. Use for running builds, tests, git commands, or any CLI tool.',
+  inputSchema: z.object({
+    command: z.string().describe('Shell command to execute'),
+    timeout: z.number().optional().describe('Timeout in milliseconds (default: 30000)'),
+  }),
+  outputSchema: z.object({
+    stdout: z.string(),
+    stderr: z.string(),
+    exitCode: z.number(),
+  }),
+  permission: 'exec',
+  sideEffect: 'process',
+  timeout: 60000,
+  retries: 0,
+  tags: ['shell', 'exec'],
+  renderer: { icon: '⚡', color: 'green' },
+  async execute(input, ctx) {
+    ctx.emit(`Executing: ${input.command}`);
+    const timeout = input.timeout ?? 30000;
+    try {
+      const { stdout, stderr } = await execAsync(input.command, {
+        cwd: ctx.workDir,
+        timeout,
+        maxBuffer: 1024 * 1024 * 10, // 10MB
+        signal: ctx.signal,
+      });
+      return { stdout: stdout.slice(0, 50000), stderr: stderr.slice(0, 10000), exitCode: 0 };
+    } catch (err: any) {
+      return {
+        stdout: (err.stdout ?? '').slice(0, 50000),
+        stderr: (err.stderr ?? err.message ?? '').slice(0, 10000),
+        exitCode: err.code ?? 1,
+      };
+    }
+  },
+};

src/tools/web/index.ts ADDED Viewed

	@@ -0,0 +1,46 @@

+// ─── Web Fetch Tool ─────────────────────────────────────────────────────────
+import { z } from 'zod';
+import type { ToolDef } from '../../core/tools/index.js';
+export const webFetchTool: ToolDef<
+  { url: string; method?: string; headers?: Record<string, string>; body?: string },
+  { status: number; headers: Record<string, string>; body: string }
+> = {
+  id: 'web.fetch',
+  name: 'web_fetch',
+  description: 'Make an HTTP request to a URL. Returns status, headers, and body (truncated to 100KB). Useful for reading docs, APIs, and web pages.',
+  inputSchema: z.object({
+    url: z.string().url().describe('URL to fetch'),
+    method: z.string().optional().describe('HTTP method (default: GET)'),
+    headers: z.record(z.string()).optional().describe('Request headers'),
+    body: z.string().optional().describe('Request body for POST/PUT'),
+  }),
+  outputSchema: z.object({
+    status: z.number(),
+    headers: z.record(z.string()),
+    body: z.string(),
+  }),
+  permission: 'network',
+  sideEffect: 'network',
+  timeout: 30000,
+  retries: 1,
+  tags: ['web', 'network', 'http'],
+  renderer: { icon: '🌐', color: 'cyan' },
+  async execute(input, ctx) {
+    ctx.emit(`Fetching: ${input.url}`);
+    const res = await fetch(input.url, {
+      method: input.method ?? 'GET',
+      headers: input.headers,
+      body: input.body,
+      signal: ctx.signal,
+    });
+    const body = await res.text();
+    const headers: Record<string, string> = {};
+    res.headers.forEach((value, key) => { headers[key] = value; });
+    return {
+      status: res.status,
+      headers,
+      body: body.slice(0, 100_000),
+    };
+  },
+};

tsconfig.json ADDED Viewed

	@@ -0,0 +1,31 @@

+{
+  "compilerOptions": {
+    "target": "ES2022",
+    "module": "ESNext",
+    "moduleResolution": "bundler",
+    "lib": ["ES2022"],
+    "outDir": "./dist",
+    "rootDir": "./src",
+    "strict": true,
+    "esModuleInterop": true,
+    "skipLibCheck": true,
+    "forceConsistentCasingInFileNames": true,
+    "resolveJsonModule": true,
+    "declaration": true,
+    "declarationMap": true,
+    "sourceMap": true,
+    "noUncheckedIndexedAccess": true,
+    "noUnusedLocals": true,
+    "noUnusedParameters": true,
+    "exactOptionalPropertyTypes": false,
+    "paths": {
+      "@core/*": ["./src/core/*"],
+      "@providers/*": ["./src/providers/*"],
+      "@cli/*": ["./src/cli/*"],
+      "@tools/*": ["./src/tools/*"],
+      "@skills/*": ["./src/skills/*"]
+    }
+  },
+  "include": ["src/**/*.ts"],
+  "exclude": ["node_modules", "dist"]
+}