Initial AI Harness - production-grade model-agnostic CLI agent runtime
Browse files- EXTENSION_GUIDE.md +206 -0
- README.md +133 -17
- package.json +42 -0
- src/cli/commands/chat.ts +62 -0
- src/cli/commands/config.ts +27 -0
- src/cli/commands/providers.ts +23 -0
- src/cli/commands/run.ts +96 -0
- src/cli/commands/skills.ts +19 -0
- src/cli/commands/tools.ts +24 -0
- src/cli/index.ts +65 -0
- src/cli/renderers/index.ts +208 -0
- src/cli/state/factory.ts +25 -0
- src/cli/state/provider-resolver.ts +27 -0
- src/core/artifacts/index.ts +48 -0
- src/core/evaluators/index.ts +75 -0
- src/core/events/index.ts +129 -0
- src/core/index.ts +10 -0
- src/core/observability/index.ts +82 -0
- src/core/policy/index.ts +55 -0
- src/core/provider/index.ts +99 -0
- src/core/runtime/index.ts +279 -0
- src/core/skills/index.ts +39 -0
- src/core/tools/index.ts +113 -0
- src/providers/anthropic/index.ts +159 -0
- src/providers/gemini/index.ts +130 -0
- src/providers/index.ts +5 -0
- src/providers/openai/index.ts +148 -0
- src/providers/openrouter/index.ts +48 -0
- src/skills/coding/index.ts +36 -0
- src/skills/docs/index.ts +37 -0
- src/skills/index.ts +4 -0
- src/skills/research/index.ts +33 -0
- src/tools/fs/index.ts +73 -0
- src/tools/index.ts +4 -0
- src/tools/shell/index.ts +50 -0
- src/tools/web/index.ts +46 -0
- tsconfig.json +31 -0
EXTENSION_GUIDE.md
ADDED
|
@@ -0,0 +1,206 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Extension Guide
|
| 2 |
+
|
| 3 |
+
This guide explains how to extend AI Harness with new providers, tools, skills, evaluator checks, and renderers.
|
| 4 |
+
|
| 5 |
+
## Adding a New Provider
|
| 6 |
+
|
| 7 |
+
1. Create `src/providers/your-provider/index.ts`
|
| 8 |
+
2. Implement the `ProviderAdapter` interface:
|
| 9 |
+
|
| 10 |
+
```typescript
|
| 11 |
+
import type { ProviderAdapter, ProviderConfig, ModelInfo, ModelCapability, ModelInvocation, ModelResult, ModelStreamEvent } from '../../core/provider/index.js';
|
| 12 |
+
|
| 13 |
+
export class YourProvider implements ProviderAdapter {
|
| 14 |
+
id = 'your-provider';
|
| 15 |
+
label = 'Your Provider';
|
| 16 |
+
|
| 17 |
+
constructor(config: ProviderConfig) {
|
| 18 |
+
// Store API key, base URL, etc.
|
| 19 |
+
}
|
| 20 |
+
|
| 21 |
+
async listModels(): Promise<ModelInfo[]> {
|
| 22 |
+
// Return available models with capabilities and pricing
|
| 23 |
+
}
|
| 24 |
+
|
| 25 |
+
supports(capability: ModelCapability): boolean {
|
| 26 |
+
// Return true for supported capabilities
|
| 27 |
+
}
|
| 28 |
+
|
| 29 |
+
async invoke(input: ModelInvocation): Promise<ModelResult> {
|
| 30 |
+
// Make a non-streaming request, return parsed result
|
| 31 |
+
}
|
| 32 |
+
|
| 33 |
+
async *stream(input: ModelInvocation): AsyncIterable<ModelStreamEvent> {
|
| 34 |
+
// Yield streaming events: text-delta, tool-call-start, tool-call-delta, tool-call-end, finish
|
| 35 |
+
}
|
| 36 |
+
}
|
| 37 |
+
```
|
| 38 |
+
|
| 39 |
+
3. Register in `src/cli/state/provider-resolver.ts`:
|
| 40 |
+
|
| 41 |
+
```typescript
|
| 42 |
+
case 'your-provider':
|
| 43 |
+
return new YourProvider({ id: 'your-provider', ...config });
|
| 44 |
+
```
|
| 45 |
+
|
| 46 |
+
### Key requirements:
|
| 47 |
+
- Normalize all message formats to the common `Message` type
|
| 48 |
+
- Handle tool calling format differences internally
|
| 49 |
+
- Emit proper `TokenUsage` in results (even if estimated)
|
| 50 |
+
- Support `AbortSignal` for cancellation
|
| 51 |
+
- Handle rate limits and retries internally
|
| 52 |
+
|
| 53 |
+
---
|
| 54 |
+
|
| 55 |
+
## Adding a New Tool
|
| 56 |
+
|
| 57 |
+
1. Create `src/tools/your-category/index.ts`
|
| 58 |
+
2. Define the tool with full typing:
|
| 59 |
+
|
| 60 |
+
```typescript
|
| 61 |
+
import { z } from 'zod';
|
| 62 |
+
import type { ToolDef } from '../../core/tools/index.js';
|
| 63 |
+
|
| 64 |
+
export const yourTool: ToolDef<
|
| 65 |
+
{ param1: string; param2?: number },
|
| 66 |
+
{ result: string }
|
| 67 |
+
> = {
|
| 68 |
+
id: 'category.action',
|
| 69 |
+
name: 'your_tool_name', // This is what the model sees
|
| 70 |
+
description: 'Clear, concise description of what this tool does.',
|
| 71 |
+
inputSchema: z.object({
|
| 72 |
+
param1: z.string().describe('What this parameter does'),
|
| 73 |
+
param2: z.number().optional().describe('Optional parameter'),
|
| 74 |
+
}),
|
| 75 |
+
outputSchema: z.object({
|
| 76 |
+
result: z.string(),
|
| 77 |
+
}),
|
| 78 |
+
permission: 'read', // read | write | exec | network | dangerous
|
| 79 |
+
sideEffect: 'none', // none | filesystem | network | process | mixed
|
| 80 |
+
timeout: 10000, // ms
|
| 81 |
+
retries: 1, // number of automatic retries on failure
|
| 82 |
+
tags: ['your-category'],
|
| 83 |
+
renderer: { icon: '🔧', color: 'blue' },
|
| 84 |
+
async execute(input, ctx) {
|
| 85 |
+
ctx.emit('Starting execution...'); // Progress updates
|
| 86 |
+
// Do the work
|
| 87 |
+
return { result: 'done' };
|
| 88 |
+
},
|
| 89 |
+
};
|
| 90 |
+
```
|
| 91 |
+
|
| 92 |
+
3. Register in the runtime setup (e.g., `src/cli/commands/run.ts`):
|
| 93 |
+
|
| 94 |
+
```typescript
|
| 95 |
+
import { yourTool } from '../../tools/your-category/index.js';
|
| 96 |
+
tools.register(yourTool);
|
| 97 |
+
```
|
| 98 |
+
|
| 99 |
+
### Permission levels:
|
| 100 |
+
| Level | Meaning | Default policy |
|
| 101 |
+
|-------|---------|----------------|
|
| 102 |
+
| `read` | Only reads state | Auto-approved |
|
| 103 |
+
| `write` | Modifies files/state | Confirm in `confirm-writes` mode |
|
| 104 |
+
| `exec` | Runs processes | Confirm in `confirm-writes` mode |
|
| 105 |
+
| `network` | Makes network requests | Confirm in `confirm-network` mode |
|
| 106 |
+
| `dangerous` | Destructive/irreversible | Always requires approval |
|
| 107 |
+
|
| 108 |
+
---
|
| 109 |
+
|
| 110 |
+
## Adding a New Skill
|
| 111 |
+
|
| 112 |
+
1. Create `src/skills/your-skill/index.ts`
|
| 113 |
+
|
| 114 |
+
```typescript
|
| 115 |
+
import type { SkillModule } from '../../core/skills/index.js';
|
| 116 |
+
|
| 117 |
+
export const yourSkill: SkillModule = {
|
| 118 |
+
id: 'your-skill',
|
| 119 |
+
title: 'Your Skill Title',
|
| 120 |
+
description: 'One-line description.',
|
| 121 |
+
suggestedTools: ['tool_name_1', 'tool_name_2'],
|
| 122 |
+
tags: ['tag1', 'tag2'],
|
| 123 |
+
instructions: `Detailed multi-line instructions that will be injected into the system prompt when this skill is active.
|
| 124 |
+
|
| 125 |
+
## Section 1
|
| 126 |
+
- Rule 1
|
| 127 |
+
- Rule 2
|
| 128 |
+
|
| 129 |
+
## Section 2
|
| 130 |
+
- Rule 3`,
|
| 131 |
+
};
|
| 132 |
+
```
|
| 133 |
+
|
| 134 |
+
2. Register in the skill registry and export from `src/skills/index.ts`.
|
| 135 |
+
|
| 136 |
+
### Tips:
|
| 137 |
+
- Keep instructions focused and actionable
|
| 138 |
+
- Reference specific tool names the model should use
|
| 139 |
+
- Include both "do" and "don't" rules
|
| 140 |
+
- Structure with markdown headings for readability
|
| 141 |
+
|
| 142 |
+
---
|
| 143 |
+
|
| 144 |
+
## Adding an Evaluator Check
|
| 145 |
+
|
| 146 |
+
```typescript
|
| 147 |
+
import type { EvalCheck } from '../../core/evaluators/index.js';
|
| 148 |
+
|
| 149 |
+
export const yourCheck: EvalCheck = {
|
| 150 |
+
name: 'your-check-name',
|
| 151 |
+
async run(ctx) {
|
| 152 |
+
// ctx.goal — the original task goal
|
| 153 |
+
// ctx.assistantOutput — all assistant messages concatenated
|
| 154 |
+
// ctx.artifacts — generated artifacts
|
| 155 |
+
// ctx.workDir — working directory
|
| 156 |
+
|
| 157 |
+
const passed = /* your logic */;
|
| 158 |
+
return {
|
| 159 |
+
passed,
|
| 160 |
+
message: passed ? undefined : 'Explanation of what failed',
|
| 161 |
+
};
|
| 162 |
+
},
|
| 163 |
+
};
|
| 164 |
+
|
| 165 |
+
// Register:
|
| 166 |
+
evaluator.addCheck(yourCheck);
|
| 167 |
+
```
|
| 168 |
+
|
| 169 |
+
### Common check patterns:
|
| 170 |
+
- **Schema validation** — parse output with Zod
|
| 171 |
+
- **File existence** — verify expected files were created
|
| 172 |
+
- **Test execution** — run `npm test` and check exit code
|
| 173 |
+
- **Content matching** — verify output contains required elements
|
| 174 |
+
- **Length/quality** — check response isn't too short or repetitive
|
| 175 |
+
|
| 176 |
+
---
|
| 177 |
+
|
| 178 |
+
## Custom Renderer
|
| 179 |
+
|
| 180 |
+
The `EventRenderer` class in `src/cli/renderers/index.ts` handles all terminal output. To customize:
|
| 181 |
+
|
| 182 |
+
1. Subclass or modify `EventRenderer`
|
| 183 |
+
2. Add cases for event types you want to render differently
|
| 184 |
+
3. Use the box-drawing utilities for structured output
|
| 185 |
+
|
| 186 |
+
```typescript
|
| 187 |
+
class CustomRenderer extends EventRenderer {
|
| 188 |
+
override render(event: HarnessEvent): void {
|
| 189 |
+
if (event.type === 'your.custom.event') {
|
| 190 |
+
// Custom rendering
|
| 191 |
+
return;
|
| 192 |
+
}
|
| 193 |
+
super.render(event);
|
| 194 |
+
}
|
| 195 |
+
}
|
| 196 |
+
```
|
| 197 |
+
|
| 198 |
+
---
|
| 199 |
+
|
| 200 |
+
## Adding Custom Events
|
| 201 |
+
|
| 202 |
+
1. Add your event type to the `HarnessEvent` union in `src/core/events/index.ts`
|
| 203 |
+
2. Emit it via `eventBus.emit({ type: 'your.event', ... })`
|
| 204 |
+
3. Handle it in the renderer
|
| 205 |
+
|
| 206 |
+
The event system is intentionally open — any component can emit events, and any number of listeners can consume them.
|
README.md
CHANGED
|
@@ -1,26 +1,142 @@
|
|
| 1 |
-
|
| 2 |
-
tags:
|
| 3 |
-
- ml-intern
|
| 4 |
-
---
|
| 5 |
|
| 6 |
-
|
| 7 |
|
| 8 |
-
|
| 9 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 10 |
|
| 11 |
-
|
| 12 |
|
| 13 |
-
|
| 14 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 15 |
|
| 16 |
-
##
|
| 17 |
|
| 18 |
-
|
| 19 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 20 |
|
| 21 |
-
|
| 22 |
-
|
| 23 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 24 |
```
|
| 25 |
|
| 26 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# AI Harness
|
|
|
|
|
|
|
|
|
|
| 2 |
|
| 3 |
+
A production-grade, model-agnostic CLI harness for agentic AI workflows.
|
| 4 |
|
| 5 |
+
```
|
| 6 |
+
╭─────────────────────────────────────╮
|
| 7 |
+
│ ⚡ AI Harness v0.1.0 │
|
| 8 |
+
│ model-agnostic CLI agent runtime │
|
| 9 |
+
╰─────────────────────────────────────╯
|
| 10 |
+
```
|
| 11 |
+
|
| 12 |
+
## What is this?
|
| 13 |
+
|
| 14 |
+
A terminal-first agent runtime. Not a toy chatbot. It supports:
|
| 15 |
+
|
| 16 |
+
- **Multiple LLM providers** — OpenAI, Anthropic, Gemini, OpenRouter, any OpenAI-compatible endpoint
|
| 17 |
+
- **Typed tool calling** — Zod-validated inputs/outputs, permissions, retries, timeouts
|
| 18 |
+
- **Modular skills** — Attachable instruction packs per task
|
| 19 |
+
- **Structured runtime** — Planner/executor/evaluator roles, budgets, loop detection
|
| 20 |
+
- **Beautiful CLI output** — Streaming, spinners, panels, event timeline, metrics
|
| 21 |
+
- **Observability** — Token usage, cost tracking, latency, success rates
|
| 22 |
+
- **Evaluation** — Schema checks, rubric scoring, remediation loops
|
| 23 |
+
- **Artifact handling** — Files, patches, logs, export to Markdown/JSON
|
| 24 |
+
- **Safety & permissions** — Read/write/exec/network/dangerous levels with policy modes
|
| 25 |
+
|
| 26 |
+
## Quick Start
|
| 27 |
+
|
| 28 |
+
```bash
|
| 29 |
+
# Install dependencies
|
| 30 |
+
pnpm install
|
| 31 |
+
|
| 32 |
+
# Build
|
| 33 |
+
pnpm build
|
| 34 |
+
|
| 35 |
+
# Interactive chat
|
| 36 |
+
pnpm chat
|
| 37 |
+
|
| 38 |
+
# Autonomous task
|
| 39 |
+
node dist/cli/index.js run "refactor the auth module to use JWT"
|
| 40 |
+
|
| 41 |
+
# List providers/models
|
| 42 |
+
node dist/cli/index.js providers
|
| 43 |
+
|
| 44 |
+
# List tools
|
| 45 |
+
node dist/cli/index.js tools
|
| 46 |
+
|
| 47 |
+
# List skills
|
| 48 |
+
node dist/cli/index.js skills
|
| 49 |
+
```
|
| 50 |
+
|
| 51 |
+
## Configuration
|
| 52 |
|
| 53 |
+
Set provider API keys via environment variables:
|
| 54 |
|
| 55 |
+
```bash
|
| 56 |
+
export OPENAI_API_KEY="sk-..."
|
| 57 |
+
export ANTHROPIC_API_KEY="sk-ant-..."
|
| 58 |
+
export GEMINI_API_KEY="AI..."
|
| 59 |
+
export OPENROUTER_API_KEY="sk-or-..."
|
| 60 |
+
```
|
| 61 |
+
|
| 62 |
+
Override defaults with CLI flags:
|
| 63 |
+
|
| 64 |
+
```bash
|
| 65 |
+
harness chat --provider openai --model gpt-4o --skills coding research --verbose
|
| 66 |
+
harness run "build a REST API" --provider anthropic --model claude-sonnet-4-20250514 --budget-tokens 100000
|
| 67 |
+
```
|
| 68 |
|
| 69 |
+
## Commands
|
| 70 |
|
| 71 |
+
| Command | Description |
|
| 72 |
+
|---------|-------------|
|
| 73 |
+
| `harness chat` | Interactive multi-turn chat |
|
| 74 |
+
| `harness run <goal>` | Autonomous task execution |
|
| 75 |
+
| `harness providers` | List providers and models |
|
| 76 |
+
| `harness tools` | List available tools |
|
| 77 |
+
| `harness skills` | List available skills |
|
| 78 |
+
| `harness config` | Show configuration |
|
| 79 |
|
| 80 |
+
## Architecture
|
| 81 |
+
|
| 82 |
+
```
|
| 83 |
+
src/
|
| 84 |
+
core/
|
| 85 |
+
events/ — Event types, EventBus
|
| 86 |
+
provider/ — ProviderAdapter interface, message types
|
| 87 |
+
runtime/ — Session state, orchestration loop
|
| 88 |
+
tools/ — ToolRegistry, ToolDef, permissions
|
| 89 |
+
skills/ — SkillRegistry, SkillModule
|
| 90 |
+
evaluators/ — Evaluation checks, EvalReport
|
| 91 |
+
artifacts/ — ArtifactStore, export
|
| 92 |
+
policy/ — PolicyEngine, permission enforcement
|
| 93 |
+
observability/ — MetricsCollector, MetricEntry
|
| 94 |
+
providers/
|
| 95 |
+
openai/ — OpenAI adapter
|
| 96 |
+
anthropic/ — Anthropic adapter
|
| 97 |
+
gemini/ — Google Gemini adapter
|
| 98 |
+
openrouter/ — OpenRouter + OpenAI-compatible adapter
|
| 99 |
+
tools/
|
| 100 |
+
fs/ — read_file, write_file, list_directory
|
| 101 |
+
shell/ — shell_exec
|
| 102 |
+
web/ — web_fetch
|
| 103 |
+
skills/
|
| 104 |
+
coding/ — Software engineering instructions
|
| 105 |
+
research/ — Research & analysis instructions
|
| 106 |
+
docs/ — Technical writing instructions
|
| 107 |
+
cli/
|
| 108 |
+
index.ts — Commander entry point
|
| 109 |
+
commands/ — chat, run, providers, tools, skills, config
|
| 110 |
+
renderers/ — EventRenderer, Spinner, box drawing, metrics
|
| 111 |
+
state/ — Provider resolver, runtime factory
|
| 112 |
```
|
| 113 |
|
| 114 |
+
## Key Design Decisions
|
| 115 |
+
|
| 116 |
+
### Event-driven architecture
|
| 117 |
+
Everything flows through `EventBus`. Rendering, logging, metrics collection, and export all subscribe to the same event stream. This means you can add a new consumer (e.g., a web dashboard) without touching core logic.
|
| 118 |
+
|
| 119 |
+
### Provider normalization
|
| 120 |
+
All providers implement `ProviderAdapter` with `invoke()` and `stream()`. Message format, tool calling conventions, and response parsing are handled per-provider so the runtime never sees vendor-specific shapes.
|
| 121 |
+
|
| 122 |
+
### Typed tools with Zod
|
| 123 |
+
Every tool declares its input/output schemas with Zod. The runtime validates inputs before execution and can generate JSON Schema for model function-calling automatically.
|
| 124 |
+
|
| 125 |
+
### Policy enforcement
|
| 126 |
+
The `PolicyEngine` checks permission levels against the current policy mode before executing any tool. Denied tools return structured error messages to the model so it can adapt.
|
| 127 |
+
|
| 128 |
+
### Evaluation loop
|
| 129 |
+
After task completion, the `Evaluator` runs all registered checks. Failed checks can trigger remediation (retry with error context), preventing premature success declarations.
|
| 130 |
+
|
| 131 |
+
## Extending
|
| 132 |
+
|
| 133 |
+
See [EXTENSION_GUIDE.md](./EXTENSION_GUIDE.md) for detailed instructions on adding:
|
| 134 |
+
- New providers
|
| 135 |
+
- New tools
|
| 136 |
+
- New skills
|
| 137 |
+
- New evaluator checks
|
| 138 |
+
- Custom renderers
|
| 139 |
+
|
| 140 |
+
## License
|
| 141 |
+
|
| 142 |
+
MIT
|
package.json
ADDED
|
@@ -0,0 +1,42 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"name": "ai-harness",
|
| 3 |
+
"version": "0.1.0",
|
| 4 |
+
"private": true,
|
| 5 |
+
"type": "module",
|
| 6 |
+
"description": "Production-grade, model-agnostic CLI harness for agentic AI workflows",
|
| 7 |
+
"engines": { "node": ">=20.0.0" },
|
| 8 |
+
"scripts": {
|
| 9 |
+
"build": "tsc -b",
|
| 10 |
+
"dev": "tsc -b --watch",
|
| 11 |
+
"start": "node dist/cli/index.js",
|
| 12 |
+
"chat": "node dist/cli/index.js chat",
|
| 13 |
+
"run": "node dist/cli/index.js run",
|
| 14 |
+
"test": "vitest"
|
| 15 |
+
},
|
| 16 |
+
"dependencies": {
|
| 17 |
+
"zod": "^3.23.0",
|
| 18 |
+
"commander": "^12.1.0",
|
| 19 |
+
"chalk": "^5.3.0",
|
| 20 |
+
"ora": "^8.0.0",
|
| 21 |
+
"marked": "^12.0.0",
|
| 22 |
+
"marked-terminal": "^7.0.0",
|
| 23 |
+
"cli-table3": "^0.6.5",
|
| 24 |
+
"log-update": "^6.0.0",
|
| 25 |
+
"conf": "^13.0.0",
|
| 26 |
+
"better-sqlite3": "^11.0.0",
|
| 27 |
+
"nanoid": "^5.0.0",
|
| 28 |
+
"openai": "^4.52.0",
|
| 29 |
+
"@anthropic-ai/sdk": "^0.24.0",
|
| 30 |
+
"@google/generative-ai": "^0.12.0",
|
| 31 |
+
"eventsource-parser": "^1.1.0",
|
| 32 |
+
"undici": "^6.19.0",
|
| 33 |
+
"figures": "^6.1.0",
|
| 34 |
+
"boxen": "^8.0.0"
|
| 35 |
+
},
|
| 36 |
+
"devDependencies": {
|
| 37 |
+
"typescript": "^5.5.0",
|
| 38 |
+
"@types/node": "^20.14.0",
|
| 39 |
+
"@types/better-sqlite3": "^7.6.0",
|
| 40 |
+
"vitest": "^1.6.0"
|
| 41 |
+
}
|
| 42 |
+
}
|
src/cli/commands/chat.ts
ADDED
|
@@ -0,0 +1,62 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
// ─── Chat Command ───────────────────────────────────────────────────────────
|
| 2 |
+
import { createInterface } from 'readline';
|
| 3 |
+
import { createRuntime } from '../state/factory.js';
|
| 4 |
+
import { EventRenderer } from '../renderers/index.js';
|
| 5 |
+
import type { Message } from '../../core/provider/index.js';
|
| 6 |
+
import { now, type HarnessEvent } from '../../core/events/index.js';
|
| 7 |
+
|
| 8 |
+
export async function chatCommand(opts: {
|
| 9 |
+
provider: string;
|
| 10 |
+
model?: string;
|
| 11 |
+
skills: string[];
|
| 12 |
+
verbose?: boolean;
|
| 13 |
+
compact?: boolean;
|
| 14 |
+
}): Promise<void> {
|
| 15 |
+
const renderer = new EventRenderer({ verbose: opts.verbose ?? false, compact: opts.compact ?? false });
|
| 16 |
+
const { runtime, eventBus, provider } = await createRuntime(opts);
|
| 17 |
+
|
| 18 |
+
eventBus.on((event) => renderer.render(event));
|
| 19 |
+
|
| 20 |
+
const rl = createInterface({ input: process.stdin, output: process.stdout });
|
| 21 |
+
const prompt = () => new Promise<string>((resolve) => rl.question('\x1b[36m❯\x1b[0m ', resolve));
|
| 22 |
+
|
| 23 |
+
console.log(`\x1b[90mProvider: ${provider.label} | Type your message (Ctrl+C to exit)\x1b[0m\n`);
|
| 24 |
+
|
| 25 |
+
const messages: Message[] = [{ role: 'system', content: 'You are a helpful AI assistant with access to tools.' }];
|
| 26 |
+
|
| 27 |
+
while (true) {
|
| 28 |
+
const input = await prompt();
|
| 29 |
+
if (!input.trim()) continue;
|
| 30 |
+
if (input.trim() === '/quit' || input.trim() === '/exit') break;
|
| 31 |
+
|
| 32 |
+
messages.push({ role: 'user', content: input });
|
| 33 |
+
|
| 34 |
+
eventBus.emit({ type: 'model.request.start', provider: provider.id, model: opts.model ?? 'default', timestamp: now() });
|
| 35 |
+
const startMs = Date.now();
|
| 36 |
+
|
| 37 |
+
try {
|
| 38 |
+
for await (const event of provider.stream({
|
| 39 |
+
model: opts.model ?? (await provider.listModels())[0]!.id,
|
| 40 |
+
messages,
|
| 41 |
+
tools: runtime ? undefined : undefined, // Tools available through runtime
|
| 42 |
+
})) {
|
| 43 |
+
if (event.type === 'text-delta') {
|
| 44 |
+
process.stdout.write(event.text);
|
| 45 |
+
} else if (event.type === 'finish') {
|
| 46 |
+
const durationMs = Date.now() - startMs;
|
| 47 |
+
eventBus.emit({ type: 'model.request.end', provider: provider.id, model: opts.model ?? 'default', usage: event.result.usage, durationMs, timestamp: now() });
|
| 48 |
+
messages.push({ role: 'assistant', content: event.result.content });
|
| 49 |
+
if (!event.result.content.endsWith('\n')) process.stdout.write('\n');
|
| 50 |
+
}
|
| 51 |
+
}
|
| 52 |
+
} catch (err) {
|
| 53 |
+
const errMsg = err instanceof Error ? err.message : String(err);
|
| 54 |
+
console.error(`\x1b[31mError: ${errMsg}\x1b[0m`);
|
| 55 |
+
}
|
| 56 |
+
console.log();
|
| 57 |
+
}
|
| 58 |
+
|
| 59 |
+
rl.close();
|
| 60 |
+
console.log('\x1b[90mGoodbye.\x1b[0m');
|
| 61 |
+
process.exit(0);
|
| 62 |
+
}
|
src/cli/commands/config.ts
ADDED
|
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
// ─── Config Command ──────────────────────────────────────────────────────────
|
| 2 |
+
|
| 3 |
+
export async function configCommand(opts: { show?: boolean }): Promise<void> {
|
| 4 |
+
const config = {
|
| 5 |
+
provider: process.env['HARNESS_PROVIDER'] ?? 'anthropic',
|
| 6 |
+
model: process.env['HARNESS_MODEL'] ?? 'auto (first from provider)',
|
| 7 |
+
approvalMode: process.env['HARNESS_APPROVAL'] ?? 'confirm-writes',
|
| 8 |
+
budget: {
|
| 9 |
+
maxTokens: process.env['HARNESS_BUDGET_TOKENS'] ?? 'unlimited',
|
| 10 |
+
maxCostUsd: process.env['HARNESS_BUDGET_COST'] ?? 'unlimited',
|
| 11 |
+
},
|
| 12 |
+
skills: ['coding', 'research', 'docs'],
|
| 13 |
+
tools: {
|
| 14 |
+
filesystem: true,
|
| 15 |
+
shell: true,
|
| 16 |
+
webFetch: true,
|
| 17 |
+
},
|
| 18 |
+
observability: {
|
| 19 |
+
level: 'standard',
|
| 20 |
+
saveTraces: true,
|
| 21 |
+
},
|
| 22 |
+
};
|
| 23 |
+
|
| 24 |
+
console.log('\n\x1b[1mCurrent Configuration:\x1b[0m\n');
|
| 25 |
+
console.log(JSON.stringify(config, null, 2));
|
| 26 |
+
console.log('\n\x1b[90mSet via environment variables (HARNESS_PROVIDER, HARNESS_MODEL, etc.) or --flags.\x1b[0m\n');
|
| 27 |
+
}
|
src/cli/commands/providers.ts
ADDED
|
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
// ─── Providers Command ──────────────────────────────────────────────────────
|
| 2 |
+
import { resolveProvider } from '../state/provider-resolver.js';
|
| 3 |
+
|
| 4 |
+
export async function providersCommand(): Promise<void> {
|
| 5 |
+
const providerIds = ['openai', 'anthropic', 'gemini', 'openrouter'];
|
| 6 |
+
|
| 7 |
+
console.log('\n\x1b[1mAvailable Providers:\x1b[0m\n');
|
| 8 |
+
|
| 9 |
+
for (const id of providerIds) {
|
| 10 |
+
try {
|
| 11 |
+
const provider = resolveProvider(id);
|
| 12 |
+
const models = await provider.listModels();
|
| 13 |
+
console.log(` \x1b[36m${provider.label}\x1b[0m (${id})`);
|
| 14 |
+
for (const model of models) {
|
| 15 |
+
const cost = model.costPerMillionInput ? `$${model.costPerMillionInput}/M in, $${model.costPerMillionOutput}/M out` : 'pricing unknown';
|
| 16 |
+
console.log(` • ${model.name} \x1b[90m(${model.id}) — ${(model.contextWindow / 1000).toFixed(0)}K ctx — ${cost}\x1b[0m`);
|
| 17 |
+
}
|
| 18 |
+
console.log();
|
| 19 |
+
} catch {
|
| 20 |
+
console.log(` \x1b[33m${id}\x1b[0m — not configured\n`);
|
| 21 |
+
}
|
| 22 |
+
}
|
| 23 |
+
}
|
src/cli/commands/run.ts
ADDED
|
@@ -0,0 +1,96 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
// ─── Run Command ────────────────────────────────────────────────────────────
|
| 2 |
+
import { createRuntime } from '../state/factory.js';
|
| 3 |
+
import { EventRenderer, renderMetrics } from '../renderers/index.js';
|
| 4 |
+
import { Runtime, type RuntimeConfig } from '../../core/runtime/index.js';
|
| 5 |
+
import { EventBus } from '../../core/events/index.js';
|
| 6 |
+
import { ToolRegistry } from '../../core/tools/index.js';
|
| 7 |
+
import { SkillRegistry } from '../../core/skills/index.js';
|
| 8 |
+
import { PolicyEngine } from '../../core/policy/index.js';
|
| 9 |
+
import { MetricsCollector } from '../../core/observability/index.js';
|
| 10 |
+
import { ArtifactStore } from '../../core/artifacts/index.js';
|
| 11 |
+
import { Evaluator, outputNotEmptyCheck, hasArtifactsCheck } from '../../core/evaluators/index.js';
|
| 12 |
+
import { readFileTool, writeFileTool, listDirTool } from '../../tools/fs/index.js';
|
| 13 |
+
import { shellExecTool } from '../../tools/shell/index.js';
|
| 14 |
+
import { webFetchTool } from '../../tools/web/index.js';
|
| 15 |
+
import { codingSkill } from '../../skills/coding/index.js';
|
| 16 |
+
import { researchSkill } from '../../skills/research/index.js';
|
| 17 |
+
import { docsSkill } from '../../skills/docs/index.js';
|
| 18 |
+
import { resolveProvider } from '../state/provider-resolver.js';
|
| 19 |
+
|
| 20 |
+
export async function runCommand(goal: string, opts: {
|
| 21 |
+
provider: string;
|
| 22 |
+
model?: string;
|
| 23 |
+
skills: string[];
|
| 24 |
+
maxTurns?: string;
|
| 25 |
+
budgetTokens?: string;
|
| 26 |
+
budgetCost?: string;
|
| 27 |
+
approval?: string;
|
| 28 |
+
verbose?: boolean;
|
| 29 |
+
compact?: boolean;
|
| 30 |
+
}): Promise<void> {
|
| 31 |
+
const renderer = new EventRenderer({ verbose: opts.verbose ?? false, compact: opts.compact ?? false });
|
| 32 |
+
|
| 33 |
+
// Setup
|
| 34 |
+
const eventBus = new EventBus();
|
| 35 |
+
eventBus.on((event) => renderer.render(event));
|
| 36 |
+
|
| 37 |
+
const provider = resolveProvider(opts.provider);
|
| 38 |
+
const model = opts.model ?? (await provider.listModels())[0]!.id;
|
| 39 |
+
|
| 40 |
+
// Tools
|
| 41 |
+
const tools = new ToolRegistry();
|
| 42 |
+
tools.register(readFileTool);
|
| 43 |
+
tools.register(writeFileTool);
|
| 44 |
+
tools.register(listDirTool);
|
| 45 |
+
tools.register(shellExecTool);
|
| 46 |
+
tools.register(webFetchTool);
|
| 47 |
+
|
| 48 |
+
// Skills
|
| 49 |
+
const skills = new SkillRegistry();
|
| 50 |
+
skills.register(codingSkill);
|
| 51 |
+
skills.register(researchSkill);
|
| 52 |
+
skills.register(docsSkill);
|
| 53 |
+
|
| 54 |
+
// Policy
|
| 55 |
+
const policy = new PolicyEngine({
|
| 56 |
+
mode: (opts.approval as any) ?? 'confirm-writes',
|
| 57 |
+
});
|
| 58 |
+
|
| 59 |
+
// Metrics
|
| 60 |
+
const metrics = new MetricsCollector();
|
| 61 |
+
|
| 62 |
+
// Artifacts
|
| 63 |
+
const artifacts = new ArtifactStore();
|
| 64 |
+
|
| 65 |
+
// Evaluator
|
| 66 |
+
const evaluator = new Evaluator();
|
| 67 |
+
evaluator.addCheck(outputNotEmptyCheck);
|
| 68 |
+
|
| 69 |
+
// Runtime config
|
| 70 |
+
const config: RuntimeConfig = {
|
| 71 |
+
provider,
|
| 72 |
+
model,
|
| 73 |
+
tools,
|
| 74 |
+
skills,
|
| 75 |
+
policy,
|
| 76 |
+
metrics,
|
| 77 |
+
artifacts,
|
| 78 |
+
evaluator,
|
| 79 |
+
eventBus,
|
| 80 |
+
systemPrompt: `You are an AI agent executing tasks autonomously. You have tools available. Complete the goal thoroughly, verify your work, and report results.`,
|
| 81 |
+
activeSkills: opts.skills,
|
| 82 |
+
maxTurns: opts.maxTurns ? parseInt(opts.maxTurns) : 20,
|
| 83 |
+
budgetTokens: opts.budgetTokens ? parseInt(opts.budgetTokens) : undefined,
|
| 84 |
+
budgetCostUsd: opts.budgetCost ? parseFloat(opts.budgetCost) : undefined,
|
| 85 |
+
};
|
| 86 |
+
|
| 87 |
+
// Execute
|
| 88 |
+
const runtime = new Runtime(config, goal);
|
| 89 |
+
const state = await runtime.run();
|
| 90 |
+
|
| 91 |
+
// Summary
|
| 92 |
+
const summary = metrics.summarize(state.id);
|
| 93 |
+
renderMetrics(summary);
|
| 94 |
+
|
| 95 |
+
process.exit(state.status === 'completed' ? 0 : 1);
|
| 96 |
+
}
|
src/cli/commands/skills.ts
ADDED
|
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
// ─── Skills Command ──────────────────────────────────────────────────────────
|
| 2 |
+
import { codingSkill } from '../../skills/coding/index.js';
|
| 3 |
+
import { researchSkill } from '../../skills/research/index.js';
|
| 4 |
+
import { docsSkill } from '../../skills/docs/index.js';
|
| 5 |
+
|
| 6 |
+
export async function skillsCommand(): Promise<void> {
|
| 7 |
+
const skills = [codingSkill, researchSkill, docsSkill];
|
| 8 |
+
|
| 9 |
+
console.log('\n\x1b[1mAvailable Skills:\x1b[0m\n');
|
| 10 |
+
|
| 11 |
+
for (const skill of skills) {
|
| 12 |
+
console.log(` \x1b[35m${skill.id}\x1b[0m — ${skill.title}`);
|
| 13 |
+
console.log(` ${skill.description}`);
|
| 14 |
+
if (skill.suggestedTools?.length) {
|
| 15 |
+
console.log(` \x1b[90mTools: ${skill.suggestedTools.join(', ')}\x1b[0m`);
|
| 16 |
+
}
|
| 17 |
+
console.log();
|
| 18 |
+
}
|
| 19 |
+
}
|
src/cli/commands/tools.ts
ADDED
|
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
// ─── Tools Command ──────────────────────────────────────────────────────────
|
| 2 |
+
import { ToolRegistry } from '../../core/tools/index.js';
|
| 3 |
+
import { readFileTool, writeFileTool, listDirTool } from '../../tools/fs/index.js';
|
| 4 |
+
import { shellExecTool } from '../../tools/shell/index.js';
|
| 5 |
+
import { webFetchTool } from '../../tools/web/index.js';
|
| 6 |
+
|
| 7 |
+
export async function toolsCommand(): Promise<void> {
|
| 8 |
+
const registry = new ToolRegistry();
|
| 9 |
+
registry.register(readFileTool);
|
| 10 |
+
registry.register(writeFileTool);
|
| 11 |
+
registry.register(listDirTool);
|
| 12 |
+
registry.register(shellExecTool);
|
| 13 |
+
registry.register(webFetchTool);
|
| 14 |
+
|
| 15 |
+
console.log('\n\x1b[1mAvailable Tools:\x1b[0m\n');
|
| 16 |
+
|
| 17 |
+
for (const tool of registry.list()) {
|
| 18 |
+
const permColor = tool.permission === 'read' ? '32' : tool.permission === 'write' ? '33' : tool.permission === 'exec' ? '31' : tool.permission === 'network' ? '35' : '31';
|
| 19 |
+
console.log(` ${tool.renderer?.icon ?? '🔧'} \x1b[1m${tool.name}\x1b[0m \x1b[${permColor}m[${tool.permission}]\x1b[0m`);
|
| 20 |
+
console.log(` ${tool.description}`);
|
| 21 |
+
console.log(` \x1b[90mtimeout: ${tool.timeout}ms | retries: ${tool.retries} | side-effect: ${tool.sideEffect}\x1b[0m`);
|
| 22 |
+
console.log();
|
| 23 |
+
}
|
| 24 |
+
}
|
src/cli/index.ts
ADDED
|
@@ -0,0 +1,65 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
// ─── CLI Entry Point ────────────────────────────────────────────────────────
|
| 2 |
+
import { Command } from 'commander';
|
| 3 |
+
import { chatCommand } from './commands/chat.js';
|
| 4 |
+
import { runCommand } from './commands/run.js';
|
| 5 |
+
import { providersCommand } from './commands/providers.js';
|
| 6 |
+
import { toolsCommand } from './commands/tools.js';
|
| 7 |
+
import { skillsCommand } from './commands/skills.js';
|
| 8 |
+
import { configCommand } from './commands/config.js';
|
| 9 |
+
import { renderBanner } from './renderers/index.js';
|
| 10 |
+
|
| 11 |
+
const program = new Command();
|
| 12 |
+
|
| 13 |
+
program
|
| 14 |
+
.name('harness')
|
| 15 |
+
.description('Production-grade, model-agnostic AI agent CLI')
|
| 16 |
+
.version('0.1.0');
|
| 17 |
+
|
| 18 |
+
program
|
| 19 |
+
.command('chat')
|
| 20 |
+
.description('Interactive chat with the AI agent')
|
| 21 |
+
.option('-p, --provider <provider>', 'Provider to use', 'anthropic')
|
| 22 |
+
.option('-m, --model <model>', 'Model to use')
|
| 23 |
+
.option('-s, --skills <skills...>', 'Skills to load', ['coding'])
|
| 24 |
+
.option('--verbose', 'Show detailed event information')
|
| 25 |
+
.option('--compact', 'Minimal output mode')
|
| 26 |
+
.action(chatCommand);
|
| 27 |
+
|
| 28 |
+
program
|
| 29 |
+
.command('run <goal>')
|
| 30 |
+
.description('Run an autonomous task with the given goal')
|
| 31 |
+
.option('-p, --provider <provider>', 'Provider to use', 'anthropic')
|
| 32 |
+
.option('-m, --model <model>', 'Model to use')
|
| 33 |
+
.option('-s, --skills <skills...>', 'Skills to load', ['coding'])
|
| 34 |
+
.option('--max-turns <n>', 'Maximum turns', '20')
|
| 35 |
+
.option('--budget-tokens <n>', 'Token budget')
|
| 36 |
+
.option('--budget-cost <n>', 'Cost budget in USD')
|
| 37 |
+
.option('--approval <mode>', 'Approval mode', 'confirm-writes')
|
| 38 |
+
.option('--verbose', 'Show detailed event information')
|
| 39 |
+
.option('--compact', 'Minimal output mode')
|
| 40 |
+
.action(runCommand);
|
| 41 |
+
|
| 42 |
+
program
|
| 43 |
+
.command('providers')
|
| 44 |
+
.description('List available providers and models')
|
| 45 |
+
.action(providersCommand);
|
| 46 |
+
|
| 47 |
+
program
|
| 48 |
+
.command('tools')
|
| 49 |
+
.description('List available tools')
|
| 50 |
+
.action(toolsCommand);
|
| 51 |
+
|
| 52 |
+
program
|
| 53 |
+
.command('skills')
|
| 54 |
+
.description('List available skills')
|
| 55 |
+
.action(skillsCommand);
|
| 56 |
+
|
| 57 |
+
program
|
| 58 |
+
.command('config')
|
| 59 |
+
.description('Show or edit configuration')
|
| 60 |
+
.option('--show', 'Show current config')
|
| 61 |
+
.action(configCommand);
|
| 62 |
+
|
| 63 |
+
// Show banner and parse
|
| 64 |
+
renderBanner();
|
| 65 |
+
program.parse();
|
src/cli/renderers/index.ts
ADDED
|
@@ -0,0 +1,208 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
// ─── CLI Renderer ───────────────────────────────────────────────────────────
|
| 2 |
+
// Beautiful terminal output: streaming, panels, events, spinners, markdown.
|
| 3 |
+
|
| 4 |
+
import type { HarnessEvent } from '../../core/events/index.js';
|
| 5 |
+
|
| 6 |
+
// ─── ANSI color helpers (no dependency needed for basic colors) ──────────────
|
| 7 |
+
const esc = (code: string) => `\x1b[${code}m`;
|
| 8 |
+
const reset = esc('0');
|
| 9 |
+
const bold = (s: string) => `${esc('1')}${s}${reset}`;
|
| 10 |
+
const dim = (s: string) => `${esc('2')}${s}${reset}`;
|
| 11 |
+
const green = (s: string) => `${esc('32')}${s}${reset}`;
|
| 12 |
+
const yellow = (s: string) => `${esc('33')}${s}${reset}`;
|
| 13 |
+
const blue = (s: string) => `${esc('34')}${s}${reset}`;
|
| 14 |
+
const magenta = (s: string) => `${esc('35')}${s}${reset}`;
|
| 15 |
+
const cyan = (s: string) => `${esc('36')}${s}${reset}`;
|
| 16 |
+
const red = (s: string) => `${esc('31')}${s}${reset}`;
|
| 17 |
+
const gray = (s: string) => `${esc('90')}${s}${reset}`;
|
| 18 |
+
|
| 19 |
+
// ─── Box Drawing ────────────────────────────────────────────────────────────
|
| 20 |
+
const BOX = { tl: '╭', tr: '╮', bl: '╰', br: '╯', h: '─', v: '│' };
|
| 21 |
+
|
| 22 |
+
function box(title: string, content: string, color: (s: string) => string = cyan, width = 72): string {
|
| 23 |
+
const innerW = width - 4;
|
| 24 |
+
const titleStr = ` ${title} `;
|
| 25 |
+
const topLen = Math.max(0, innerW - titleStr.length);
|
| 26 |
+
const top = color(`${BOX.tl}${BOX.h}${titleStr}${'─'.repeat(topLen)}${BOX.tr}`);
|
| 27 |
+
const bot = color(`${BOX.bl}${'─'.repeat(innerW + 2)}${BOX.br}`);
|
| 28 |
+
const lines = content.split('\n').map((l) => {
|
| 29 |
+
const trimmed = l.slice(0, innerW);
|
| 30 |
+
return `${color(BOX.v)} ${trimmed}${' '.repeat(Math.max(0, innerW - stripAnsi(trimmed).length))} ${color(BOX.v)}`;
|
| 31 |
+
});
|
| 32 |
+
return [top, ...lines, bot].join('\n');
|
| 33 |
+
}
|
| 34 |
+
|
| 35 |
+
function stripAnsi(s: string): string {
|
| 36 |
+
return s.replace(/\x1b\[[0-9;]*m/g, '');
|
| 37 |
+
}
|
| 38 |
+
|
| 39 |
+
// ─── Spinner ────────────────────────────────────────────────────────────────
|
| 40 |
+
const SPINNER_FRAMES = ['⠋', '⠙', '⠹', '⠸', '⠼', '⠴', '⠦', '⠧', '⠇', '⠏'];
|
| 41 |
+
|
| 42 |
+
export class Spinner {
|
| 43 |
+
private frame = 0;
|
| 44 |
+
private interval: ReturnType<typeof setInterval> | null = null;
|
| 45 |
+
private message = '';
|
| 46 |
+
|
| 47 |
+
start(message: string): void {
|
| 48 |
+
this.message = message;
|
| 49 |
+
this.interval = setInterval(() => {
|
| 50 |
+
this.frame = (this.frame + 1) % SPINNER_FRAMES.length;
|
| 51 |
+
process.stdout.write(`\r${cyan(SPINNER_FRAMES[this.frame]!)} ${this.message}`);
|
| 52 |
+
}, 80);
|
| 53 |
+
}
|
| 54 |
+
|
| 55 |
+
update(message: string): void {
|
| 56 |
+
this.message = message;
|
| 57 |
+
}
|
| 58 |
+
|
| 59 |
+
stop(finalMessage?: string): void {
|
| 60 |
+
if (this.interval) clearInterval(this.interval);
|
| 61 |
+
this.interval = null;
|
| 62 |
+
process.stdout.write(`\r${' '.repeat(this.message.length + 4)}\r`);
|
| 63 |
+
if (finalMessage) console.log(finalMessage);
|
| 64 |
+
}
|
| 65 |
+
}
|
| 66 |
+
|
| 67 |
+
// ─── Event Renderer ─────────────────────────────────────────────────────────
|
| 68 |
+
export interface RendererOptions {
|
| 69 |
+
verbose: boolean;
|
| 70 |
+
compact: boolean;
|
| 71 |
+
}
|
| 72 |
+
|
| 73 |
+
export class EventRenderer {
|
| 74 |
+
private opts: RendererOptions;
|
| 75 |
+
private spinner = new Spinner();
|
| 76 |
+
private streamBuffer = '';
|
| 77 |
+
|
| 78 |
+
constructor(opts: Partial<RendererOptions> = {}) {
|
| 79 |
+
this.opts = { verbose: false, compact: false, ...opts };
|
| 80 |
+
}
|
| 81 |
+
|
| 82 |
+
render(event: HarnessEvent): void {
|
| 83 |
+
switch (event.type) {
|
| 84 |
+
case 'session.started':
|
| 85 |
+
console.log('\n' + box('Session', `${bold('Goal:')} ${event.goal}\n${gray(`ID: ${event.sessionId}`)}`, cyan));
|
| 86 |
+
break;
|
| 87 |
+
|
| 88 |
+
case 'session.completed':
|
| 89 |
+
this.flushStream();
|
| 90 |
+
console.log('\n' + green(`✓ ${bold('Session completed')} — ${event.summary}`));
|
| 91 |
+
break;
|
| 92 |
+
|
| 93 |
+
case 'session.failed':
|
| 94 |
+
this.flushStream();
|
| 95 |
+
console.log('\n' + red(`✗ ${bold('Session failed')} — ${event.error}`));
|
| 96 |
+
break;
|
| 97 |
+
|
| 98 |
+
case 'plan.updated':
|
| 99 |
+
if (!this.opts.compact) {
|
| 100 |
+
const planStr = event.items.map((item) => {
|
| 101 |
+
const icon = item.status === 'completed' ? green('✓') : item.status === 'in_progress' ? yellow('▶') : item.status === 'failed' ? red('✗') : gray('○');
|
| 102 |
+
return ` ${icon} ${item.title}`;
|
| 103 |
+
}).join('\n');
|
| 104 |
+
console.log('\n' + box('Plan', planStr, magenta));
|
| 105 |
+
}
|
| 106 |
+
break;
|
| 107 |
+
|
| 108 |
+
case 'model.request.start':
|
| 109 |
+
this.spinner.start(`${event.provider}/${event.model} thinking…`);
|
| 110 |
+
break;
|
| 111 |
+
|
| 112 |
+
case 'model.request.end':
|
| 113 |
+
this.spinner.stop();
|
| 114 |
+
if (this.opts.verbose) {
|
| 115 |
+
console.log(gray(` ⏱ ${event.durationMs}ms | ${event.usage.totalTokens} tokens | ~$${(event.usage.estimatedCostUsd ?? 0).toFixed(4)}`));
|
| 116 |
+
}
|
| 117 |
+
break;
|
| 118 |
+
|
| 119 |
+
case 'model.stream.delta':
|
| 120 |
+
this.streamBuffer += event.text;
|
| 121 |
+
process.stdout.write(event.text);
|
| 122 |
+
break;
|
| 123 |
+
|
| 124 |
+
case 'model.stream.end':
|
| 125 |
+
this.flushStream();
|
| 126 |
+
break;
|
| 127 |
+
|
| 128 |
+
case 'tool.requested':
|
| 129 |
+
console.log(`\n${blue('⚡')} ${bold(event.toolCall.toolName)} ${gray(`[${event.toolCall.id.slice(0, 8)}]`)}`);
|
| 130 |
+
if (this.opts.verbose) {
|
| 131 |
+
console.log(gray(` Input: ${JSON.stringify(event.toolCall.input).slice(0, 200)}`));
|
| 132 |
+
}
|
| 133 |
+
break;
|
| 134 |
+
|
| 135 |
+
case 'tool.started':
|
| 136 |
+
this.spinner.start('Tool executing…');
|
| 137 |
+
break;
|
| 138 |
+
|
| 139 |
+
case 'tool.progress':
|
| 140 |
+
this.spinner.update(event.message);
|
| 141 |
+
break;
|
| 142 |
+
|
| 143 |
+
case 'tool.finished':
|
| 144 |
+
this.spinner.stop(green(` ✓ Done`) + (this.opts.verbose ? gray(` (${event.durationMs}ms)`) : ''));
|
| 145 |
+
break;
|
| 146 |
+
|
| 147 |
+
case 'tool.failed':
|
| 148 |
+
this.spinner.stop(red(` ✗ Failed: ${event.error}`));
|
| 149 |
+
break;
|
| 150 |
+
|
| 151 |
+
case 'tool.denied':
|
| 152 |
+
console.log(yellow(` ⚠ Denied: ${event.reason}`));
|
| 153 |
+
break;
|
| 154 |
+
|
| 155 |
+
case 'evaluation.completed':
|
| 156 |
+
const r = event.report;
|
| 157 |
+
const icon = r.passed ? green('✓') : red('✗');
|
| 158 |
+
const checksStr = r.checks.map((c) => ` ${c.passed ? green('✓') : red('✗')} ${c.name}${c.message ? gray(` — ${c.message}`) : ''}`).join('\n');
|
| 159 |
+
console.log('\n' + box('Evaluation', `${icon} ${r.summary}\n${checksStr}`, r.passed ? green : red));
|
| 160 |
+
break;
|
| 161 |
+
|
| 162 |
+
case 'artifact.created':
|
| 163 |
+
console.log(`${magenta('📎')} Artifact: ${bold(event.artifact.title)} ${gray(`(${event.artifact.type})`)}`);
|
| 164 |
+
break;
|
| 165 |
+
|
| 166 |
+
case 'budget.warning':
|
| 167 |
+
console.log(yellow(`⚠ Budget warning: ${event.usage.totalTokens} tokens used`));
|
| 168 |
+
break;
|
| 169 |
+
|
| 170 |
+
case 'error':
|
| 171 |
+
console.log(red(`✗ Error: ${event.message}`));
|
| 172 |
+
break;
|
| 173 |
+
}
|
| 174 |
+
}
|
| 175 |
+
|
| 176 |
+
private flushStream(): void {
|
| 177 |
+
if (this.streamBuffer) {
|
| 178 |
+
// Ensure newline after streamed content
|
| 179 |
+
if (!this.streamBuffer.endsWith('\n')) process.stdout.write('\n');
|
| 180 |
+
this.streamBuffer = '';
|
| 181 |
+
}
|
| 182 |
+
}
|
| 183 |
+
}
|
| 184 |
+
|
| 185 |
+
// ─── Header / Banner ────────────────────────────────────────────────────────
|
| 186 |
+
export function renderBanner(): void {
|
| 187 |
+
console.log(`
|
| 188 |
+
${cyan(bold('╭─────────────────────────────────────╮'))}
|
| 189 |
+
${cyan(bold('│'))} ${bold('⚡ AI Harness')} ${gray('v0.1.0')} ${cyan(bold('│'))}
|
| 190 |
+
${cyan(bold('│'))} ${dim('model-agnostic CLI agent runtime')} ${cyan(bold('│'))}
|
| 191 |
+
${cyan(bold('╰─────────────────────────────────────╯'))}
|
| 192 |
+
`);
|
| 193 |
+
}
|
| 194 |
+
|
| 195 |
+
// ─── Metrics Summary ────────────────────────────────────────────────────────
|
| 196 |
+
export function renderMetrics(metrics: {
|
| 197 |
+
modelCalls: number; toolCalls: number; totalTokens: number;
|
| 198 |
+
estimatedCostUsd: number; totalDurationMs: number; toolSuccessRate: number;
|
| 199 |
+
}): void {
|
| 200 |
+
const content = [
|
| 201 |
+
`${bold('Model calls:')} ${metrics.modelCalls}`,
|
| 202 |
+
`${bold('Tool calls:')} ${metrics.toolCalls} (${Math.round(metrics.toolSuccessRate * 100)}% success)`,
|
| 203 |
+
`${bold('Total tokens:')} ${metrics.totalTokens.toLocaleString()}`,
|
| 204 |
+
`${bold('Est. cost:')} $${metrics.estimatedCostUsd.toFixed(4)}`,
|
| 205 |
+
`${bold('Duration:')} ${(metrics.totalDurationMs / 1000).toFixed(1)}s`,
|
| 206 |
+
].join('\n');
|
| 207 |
+
console.log('\n' + box('Metrics', content, gray));
|
| 208 |
+
}
|
src/cli/state/factory.ts
ADDED
|
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
// ─── Runtime Factory ─────────────────────────────────────────────────────────
|
| 2 |
+
import { EventBus } from '../../core/events/index.js';
|
| 3 |
+
import type { ProviderAdapter } from '../../core/provider/index.js';
|
| 4 |
+
import { resolveProvider } from './provider-resolver.js';
|
| 5 |
+
|
| 6 |
+
export interface CreateRuntimeOpts {
|
| 7 |
+
provider: string;
|
| 8 |
+
model?: string;
|
| 9 |
+
skills: string[];
|
| 10 |
+
verbose?: boolean;
|
| 11 |
+
compact?: boolean;
|
| 12 |
+
}
|
| 13 |
+
|
| 14 |
+
export async function createRuntime(opts: CreateRuntimeOpts) {
|
| 15 |
+
const eventBus = new EventBus();
|
| 16 |
+
const provider = resolveProvider(opts.provider);
|
| 17 |
+
const model = opts.model ?? (await provider.listModels())[0]?.id ?? 'unknown';
|
| 18 |
+
|
| 19 |
+
return {
|
| 20 |
+
runtime: null, // Chat mode doesn't use full runtime
|
| 21 |
+
eventBus,
|
| 22 |
+
provider,
|
| 23 |
+
model,
|
| 24 |
+
};
|
| 25 |
+
}
|
src/cli/state/provider-resolver.ts
ADDED
|
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
// ─── Provider Resolver ───────────────────────────────────────────────────────
|
| 2 |
+
import type { ProviderAdapter } from '../../core/provider/index.js';
|
| 3 |
+
import { OpenAIProvider } from '../../providers/openai/index.js';
|
| 4 |
+
import { AnthropicProvider } from '../../providers/anthropic/index.js';
|
| 5 |
+
import { GeminiProvider } from '../../providers/gemini/index.js';
|
| 6 |
+
import { OpenAICompatibleProvider, OpenRouterProvider } from '../../providers/openrouter/index.js';
|
| 7 |
+
|
| 8 |
+
export function resolveProvider(id: string, config?: { apiKey?: string; baseUrl?: string }): ProviderAdapter {
|
| 9 |
+
switch (id) {
|
| 10 |
+
case 'openai':
|
| 11 |
+
return new OpenAIProvider({ id: 'openai', ...config });
|
| 12 |
+
case 'anthropic':
|
| 13 |
+
return new AnthropicProvider({ id: 'anthropic', ...config });
|
| 14 |
+
case 'gemini':
|
| 15 |
+
return new GeminiProvider({ id: 'gemini', ...config });
|
| 16 |
+
case 'openrouter':
|
| 17 |
+
return new OpenRouterProvider({ ...config });
|
| 18 |
+
default:
|
| 19 |
+
// Treat as OpenAI-compatible endpoint
|
| 20 |
+
return new OpenAICompatibleProvider({
|
| 21 |
+
id,
|
| 22 |
+
label: id,
|
| 23 |
+
baseUrl: config?.baseUrl ?? process.env[`${id.toUpperCase()}_BASE_URL`],
|
| 24 |
+
apiKey: config?.apiKey ?? process.env[`${id.toUpperCase()}_API_KEY`],
|
| 25 |
+
} as any);
|
| 26 |
+
}
|
| 27 |
+
}
|
src/core/artifacts/index.ts
ADDED
|
@@ -0,0 +1,48 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
// ─── Artifacts ──────────────────────────────────────────────────────────────
|
| 2 |
+
// First-class artifact persistence and retrieval.
|
| 3 |
+
|
| 4 |
+
import type { ArtifactRecord } from '../events/index.js';
|
| 5 |
+
|
| 6 |
+
export class ArtifactStore {
|
| 7 |
+
private artifacts: ArtifactRecord[] = [];
|
| 8 |
+
|
| 9 |
+
add(artifact: ArtifactRecord): void {
|
| 10 |
+
this.artifacts.push(artifact);
|
| 11 |
+
}
|
| 12 |
+
|
| 13 |
+
get(id: string): ArtifactRecord | undefined {
|
| 14 |
+
return this.artifacts.find((a) => a.id === id);
|
| 15 |
+
}
|
| 16 |
+
|
| 17 |
+
list(): ArtifactRecord[] {
|
| 18 |
+
return [...this.artifacts];
|
| 19 |
+
}
|
| 20 |
+
|
| 21 |
+
listByType(type: ArtifactRecord['type']): ArtifactRecord[] {
|
| 22 |
+
return this.artifacts.filter((a) => a.type === type);
|
| 23 |
+
}
|
| 24 |
+
|
| 25 |
+
exportMarkdown(): string {
|
| 26 |
+
if (!this.artifacts.length) return '# Artifacts\n\nNo artifacts generated.\n';
|
| 27 |
+
let md = '# Artifacts\n\n';
|
| 28 |
+
for (const a of this.artifacts) {
|
| 29 |
+
md += `## ${a.title}\n\n`;
|
| 30 |
+
md += `- **Type:** ${a.type}\n`;
|
| 31 |
+
md += `- **Created:** ${a.createdAt}\n`;
|
| 32 |
+
if (a.path) md += `- **Path:** \`${a.path}\`\n`;
|
| 33 |
+
if (a.content) {
|
| 34 |
+
md += `\n\`\`\`\n${a.content.slice(0, 2000)}\n\`\`\`\n`;
|
| 35 |
+
}
|
| 36 |
+
md += '\n---\n\n';
|
| 37 |
+
}
|
| 38 |
+
return md;
|
| 39 |
+
}
|
| 40 |
+
|
| 41 |
+
exportJson(): string {
|
| 42 |
+
return JSON.stringify(this.artifacts, null, 2);
|
| 43 |
+
}
|
| 44 |
+
|
| 45 |
+
clear(): void {
|
| 46 |
+
this.artifacts = [];
|
| 47 |
+
}
|
| 48 |
+
}
|
src/core/evaluators/index.ts
ADDED
|
@@ -0,0 +1,75 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
// ─── Evaluators ─────────────────────────────────────────────────────────────
|
| 2 |
+
// Structured evaluation hooks: schema checks, tests, rubric scoring.
|
| 3 |
+
|
| 4 |
+
import type { EvaluationReport } from '../events/index.js';
|
| 5 |
+
|
| 6 |
+
export interface EvalCheck {
|
| 7 |
+
name: string;
|
| 8 |
+
run(context: EvalContext): Promise<{ passed: boolean; message?: string }>;
|
| 9 |
+
}
|
| 10 |
+
|
| 11 |
+
export interface EvalContext {
|
| 12 |
+
sessionId: string;
|
| 13 |
+
goal: string;
|
| 14 |
+
artifacts: Array<{ path?: string; content?: string; type: string }>;
|
| 15 |
+
assistantOutput: string;
|
| 16 |
+
workDir: string;
|
| 17 |
+
}
|
| 18 |
+
|
| 19 |
+
export class Evaluator {
|
| 20 |
+
private checks: EvalCheck[] = [];
|
| 21 |
+
|
| 22 |
+
addCheck(check: EvalCheck): void {
|
| 23 |
+
this.checks.push(check);
|
| 24 |
+
}
|
| 25 |
+
|
| 26 |
+
async evaluate(ctx: EvalContext): Promise<EvaluationReport> {
|
| 27 |
+
const results: Array<{ name: string; passed: boolean; message?: string }> = [];
|
| 28 |
+
|
| 29 |
+
for (const check of this.checks) {
|
| 30 |
+
try {
|
| 31 |
+
const result = await check.run(ctx);
|
| 32 |
+
results.push({ name: check.name, ...result });
|
| 33 |
+
} catch (err) {
|
| 34 |
+
results.push({
|
| 35 |
+
name: check.name,
|
| 36 |
+
passed: false,
|
| 37 |
+
message: `Check threw: ${err instanceof Error ? err.message : String(err)}`,
|
| 38 |
+
});
|
| 39 |
+
}
|
| 40 |
+
}
|
| 41 |
+
|
| 42 |
+
const passed = results.every((r) => r.passed);
|
| 43 |
+
const score = results.length ? results.filter((r) => r.passed).length / results.length : 1;
|
| 44 |
+
|
| 45 |
+
return {
|
| 46 |
+
passed,
|
| 47 |
+
score,
|
| 48 |
+
checks: results,
|
| 49 |
+
summary: passed
|
| 50 |
+
? `All ${results.length} checks passed.`
|
| 51 |
+
: `${results.filter((r) => !r.passed).length}/${results.length} checks failed.`,
|
| 52 |
+
};
|
| 53 |
+
}
|
| 54 |
+
}
|
| 55 |
+
|
| 56 |
+
// ─── Built-in Checks ────────────────────────────────────────────────────────
|
| 57 |
+
export const outputNotEmptyCheck: EvalCheck = {
|
| 58 |
+
name: 'output-not-empty',
|
| 59 |
+
async run(ctx) {
|
| 60 |
+
return {
|
| 61 |
+
passed: ctx.assistantOutput.trim().length > 0,
|
| 62 |
+
message: ctx.assistantOutput.trim().length > 0 ? undefined : 'Assistant output is empty.',
|
| 63 |
+
};
|
| 64 |
+
},
|
| 65 |
+
};
|
| 66 |
+
|
| 67 |
+
export const hasArtifactsCheck: EvalCheck = {
|
| 68 |
+
name: 'has-artifacts',
|
| 69 |
+
async run(ctx) {
|
| 70 |
+
return {
|
| 71 |
+
passed: ctx.artifacts.length > 0,
|
| 72 |
+
message: ctx.artifacts.length > 0 ? undefined : 'No artifacts were produced.',
|
| 73 |
+
};
|
| 74 |
+
},
|
| 75 |
+
};
|
src/core/events/index.ts
ADDED
|
@@ -0,0 +1,129 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
// ─── Core Event Types ───────────────────────────────────────────────────────
|
| 2 |
+
// The internal event model is the backbone of the harness. Rendering, logging,
|
| 3 |
+
// replay, export, and debugging all consume the same event stream.
|
| 4 |
+
|
| 5 |
+
import { z } from 'zod';
|
| 6 |
+
|
| 7 |
+
// ─── Plan ───────────────────────────────────────────────────────────────────
|
| 8 |
+
export const PlanItemSchema = z.object({
|
| 9 |
+
id: z.string(),
|
| 10 |
+
title: z.string(),
|
| 11 |
+
status: z.enum(['pending', 'in_progress', 'completed', 'failed', 'skipped']),
|
| 12 |
+
detail: z.string().optional(),
|
| 13 |
+
});
|
| 14 |
+
export type PlanItem = z.infer<typeof PlanItemSchema>;
|
| 15 |
+
|
| 16 |
+
// ─── Tool Call Record ───────────────────────────────────────────────────────
|
| 17 |
+
export const ToolCallRecordSchema = z.object({
|
| 18 |
+
id: z.string(),
|
| 19 |
+
toolId: z.string(),
|
| 20 |
+
toolName: z.string(),
|
| 21 |
+
input: z.unknown(),
|
| 22 |
+
status: z.enum([
|
| 23 |
+
'queued',
|
| 24 |
+
'awaiting-approval',
|
| 25 |
+
'running',
|
| 26 |
+
'streaming',
|
| 27 |
+
'success',
|
| 28 |
+
'failed',
|
| 29 |
+
'denied',
|
| 30 |
+
'cancelled',
|
| 31 |
+
]),
|
| 32 |
+
output: z.unknown().optional(),
|
| 33 |
+
error: z.string().optional(),
|
| 34 |
+
durationMs: z.number().optional(),
|
| 35 |
+
startedAt: z.string().optional(),
|
| 36 |
+
finishedAt: z.string().optional(),
|
| 37 |
+
});
|
| 38 |
+
export type ToolCallRecord = z.infer<typeof ToolCallRecordSchema>;
|
| 39 |
+
|
| 40 |
+
// ─── Artifact Record ────────────────────────────────────────────────────────
|
| 41 |
+
export const ArtifactRecordSchema = z.object({
|
| 42 |
+
id: z.string(),
|
| 43 |
+
type: z.enum(['file', 'patch', 'log', 'screenshot', 'json', 'markdown', 'other']),
|
| 44 |
+
path: z.string().optional(),
|
| 45 |
+
title: z.string(),
|
| 46 |
+
content: z.string().optional(),
|
| 47 |
+
mimeType: z.string().optional(),
|
| 48 |
+
createdAt: z.string(),
|
| 49 |
+
});
|
| 50 |
+
export type ArtifactRecord = z.infer<typeof ArtifactRecordSchema>;
|
| 51 |
+
|
| 52 |
+
// ─── Evaluation Report ──────────────────────────────────────────────────────
|
| 53 |
+
export const EvaluationReportSchema = z.object({
|
| 54 |
+
passed: z.boolean(),
|
| 55 |
+
score: z.number().min(0).max(1).optional(),
|
| 56 |
+
checks: z.array(z.object({
|
| 57 |
+
name: z.string(),
|
| 58 |
+
passed: z.boolean(),
|
| 59 |
+
message: z.string().optional(),
|
| 60 |
+
})),
|
| 61 |
+
summary: z.string().optional(),
|
| 62 |
+
});
|
| 63 |
+
export type EvaluationReport = z.infer<typeof EvaluationReportSchema>;
|
| 64 |
+
|
| 65 |
+
// ─── Token Usage ────────────────────────────────────────────────────────────
|
| 66 |
+
export const TokenUsageSchema = z.object({
|
| 67 |
+
promptTokens: z.number(),
|
| 68 |
+
completionTokens: z.number(),
|
| 69 |
+
totalTokens: z.number(),
|
| 70 |
+
estimatedCostUsd: z.number().optional(),
|
| 71 |
+
});
|
| 72 |
+
export type TokenUsage = z.infer<typeof TokenUsageSchema>;
|
| 73 |
+
|
| 74 |
+
// ─── Harness Events (discriminated union) ───────────────────────────────────
|
| 75 |
+
export type HarnessEvent =
|
| 76 |
+
| { type: 'session.started'; sessionId: string; goal: string; timestamp: string }
|
| 77 |
+
| { type: 'session.completed'; sessionId: string; summary: string; timestamp: string }
|
| 78 |
+
| { type: 'session.failed'; sessionId: string; error: string; timestamp: string }
|
| 79 |
+
| { type: 'plan.updated'; items: PlanItem[]; timestamp: string }
|
| 80 |
+
| { type: 'model.request.start'; provider: string; model: string; timestamp: string }
|
| 81 |
+
| { type: 'model.request.end'; provider: string; model: string; usage: TokenUsage; durationMs: number; timestamp: string }
|
| 82 |
+
| { type: 'model.stream.delta'; text: string; timestamp: string }
|
| 83 |
+
| { type: 'model.stream.end'; fullText: string; timestamp: string }
|
| 84 |
+
| { type: 'tool.requested'; toolCall: ToolCallRecord; timestamp: string }
|
| 85 |
+
| { type: 'tool.approved'; toolCallId: string; timestamp: string }
|
| 86 |
+
| { type: 'tool.denied'; toolCallId: string; reason: string; timestamp: string }
|
| 87 |
+
| { type: 'tool.started'; toolCallId: string; timestamp: string }
|
| 88 |
+
| { type: 'tool.progress'; toolCallId: string; message: string; timestamp: string }
|
| 89 |
+
| { type: 'tool.finished'; toolCallId: string; result: unknown; durationMs: number; timestamp: string }
|
| 90 |
+
| { type: 'tool.failed'; toolCallId: string; error: string; durationMs: number; timestamp: string }
|
| 91 |
+
| { type: 'evaluation.started'; timestamp: string }
|
| 92 |
+
| { type: 'evaluation.completed'; report: EvaluationReport; timestamp: string }
|
| 93 |
+
| { type: 'artifact.created'; artifact: ArtifactRecord; timestamp: string }
|
| 94 |
+
| { type: 'budget.warning'; usage: TokenUsage; limit: number; timestamp: string }
|
| 95 |
+
| { type: 'error'; message: string; code?: string; timestamp: string };
|
| 96 |
+
|
| 97 |
+
// ─── Event Bus ───────────���──────────────────────────────────────────────────
|
| 98 |
+
export type EventListener = (event: HarnessEvent) => void;
|
| 99 |
+
|
| 100 |
+
export class EventBus {
|
| 101 |
+
private listeners: EventListener[] = [];
|
| 102 |
+
private history: HarnessEvent[] = [];
|
| 103 |
+
|
| 104 |
+
on(listener: EventListener): () => void {
|
| 105 |
+
this.listeners.push(listener);
|
| 106 |
+
return () => {
|
| 107 |
+
this.listeners = this.listeners.filter((l) => l !== listener);
|
| 108 |
+
};
|
| 109 |
+
}
|
| 110 |
+
|
| 111 |
+
emit(event: HarnessEvent): void {
|
| 112 |
+
this.history.push(event);
|
| 113 |
+
for (const listener of this.listeners) {
|
| 114 |
+
try { listener(event); } catch { /* swallow listener errors */ }
|
| 115 |
+
}
|
| 116 |
+
}
|
| 117 |
+
|
| 118 |
+
getHistory(): readonly HarnessEvent[] {
|
| 119 |
+
return this.history;
|
| 120 |
+
}
|
| 121 |
+
|
| 122 |
+
clear(): void {
|
| 123 |
+
this.history = [];
|
| 124 |
+
}
|
| 125 |
+
}
|
| 126 |
+
|
| 127 |
+
export function now(): string {
|
| 128 |
+
return new Date().toISOString();
|
| 129 |
+
}
|
src/core/index.ts
ADDED
|
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
// ─── Core barrel export ─────────────────────────────────────────────────────
|
| 2 |
+
export * from './events/index.js';
|
| 3 |
+
export * from './provider/index.js';
|
| 4 |
+
export * from './tools/index.js';
|
| 5 |
+
export * from './skills/index.js';
|
| 6 |
+
export * from './policy/index.js';
|
| 7 |
+
export * from './observability/index.js';
|
| 8 |
+
export * from './artifacts/index.js';
|
| 9 |
+
export * from './evaluators/index.js';
|
| 10 |
+
export * from './runtime/index.js';
|
src/core/observability/index.ts
ADDED
|
@@ -0,0 +1,82 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
// ─── Observability ──────────────────────────────────────────────────────────
|
| 2 |
+
// Captures metrics for model calls, tool calls, retries, token usage, and cost.
|
| 3 |
+
|
| 4 |
+
import type { TokenUsage } from '../events/index.js';
|
| 5 |
+
|
| 6 |
+
export interface MetricEntry {
|
| 7 |
+
timestamp: string;
|
| 8 |
+
type: 'model-call' | 'tool-call' | 'retry' | 'error';
|
| 9 |
+
provider?: string;
|
| 10 |
+
model?: string;
|
| 11 |
+
toolName?: string;
|
| 12 |
+
durationMs: number;
|
| 13 |
+
usage?: TokenUsage;
|
| 14 |
+
success: boolean;
|
| 15 |
+
error?: string;
|
| 16 |
+
}
|
| 17 |
+
|
| 18 |
+
export interface SessionMetrics {
|
| 19 |
+
sessionId: string;
|
| 20 |
+
startedAt: string;
|
| 21 |
+
endedAt?: string;
|
| 22 |
+
totalDurationMs: number;
|
| 23 |
+
modelCalls: number;
|
| 24 |
+
toolCalls: number;
|
| 25 |
+
toolSuccessRate: number;
|
| 26 |
+
retryCount: number;
|
| 27 |
+
totalTokens: number;
|
| 28 |
+
promptTokens: number;
|
| 29 |
+
completionTokens: number;
|
| 30 |
+
estimatedCostUsd: number;
|
| 31 |
+
evaluationPassRate: number;
|
| 32 |
+
}
|
| 33 |
+
|
| 34 |
+
export class MetricsCollector {
|
| 35 |
+
private entries: MetricEntry[] = [];
|
| 36 |
+
private sessionStart: number = Date.now();
|
| 37 |
+
|
| 38 |
+
record(entry: MetricEntry): void {
|
| 39 |
+
this.entries.push(entry);
|
| 40 |
+
}
|
| 41 |
+
|
| 42 |
+
getEntries(): readonly MetricEntry[] {
|
| 43 |
+
return this.entries;
|
| 44 |
+
}
|
| 45 |
+
|
| 46 |
+
summarize(sessionId: string): SessionMetrics {
|
| 47 |
+
const modelCalls = this.entries.filter((e) => e.type === 'model-call');
|
| 48 |
+
const toolCalls = this.entries.filter((e) => e.type === 'tool-call');
|
| 49 |
+
const retries = this.entries.filter((e) => e.type === 'retry');
|
| 50 |
+
const successfulTools = toolCalls.filter((e) => e.success);
|
| 51 |
+
|
| 52 |
+
let totalTokens = 0, promptTokens = 0, completionTokens = 0, costUsd = 0;
|
| 53 |
+
for (const entry of modelCalls) {
|
| 54 |
+
if (entry.usage) {
|
| 55 |
+
totalTokens += entry.usage.totalTokens;
|
| 56 |
+
promptTokens += entry.usage.promptTokens;
|
| 57 |
+
completionTokens += entry.usage.completionTokens;
|
| 58 |
+
costUsd += entry.usage.estimatedCostUsd ?? 0;
|
| 59 |
+
}
|
| 60 |
+
}
|
| 61 |
+
|
| 62 |
+
return {
|
| 63 |
+
sessionId,
|
| 64 |
+
startedAt: new Date(this.sessionStart).toISOString(),
|
| 65 |
+
totalDurationMs: Date.now() - this.sessionStart,
|
| 66 |
+
modelCalls: modelCalls.length,
|
| 67 |
+
toolCalls: toolCalls.length,
|
| 68 |
+
toolSuccessRate: toolCalls.length ? successfulTools.length / toolCalls.length : 1,
|
| 69 |
+
retryCount: retries.length,
|
| 70 |
+
totalTokens,
|
| 71 |
+
promptTokens,
|
| 72 |
+
completionTokens,
|
| 73 |
+
estimatedCostUsd: costUsd,
|
| 74 |
+
evaluationPassRate: 0, // computed externally
|
| 75 |
+
};
|
| 76 |
+
}
|
| 77 |
+
|
| 78 |
+
reset(): void {
|
| 79 |
+
this.entries = [];
|
| 80 |
+
this.sessionStart = Date.now();
|
| 81 |
+
}
|
| 82 |
+
}
|
src/core/policy/index.ts
ADDED
|
@@ -0,0 +1,55 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
// ─── Policy & Permissions ───────────────────────────────────────────────────
|
| 2 |
+
// Enforces confirmation rules for tool execution based on permission levels.
|
| 3 |
+
|
| 4 |
+
import type { PermissionLevel } from '../tools/index.js';
|
| 5 |
+
|
| 6 |
+
export type PolicyMode =
|
| 7 |
+
| 'auto'
|
| 8 |
+
| 'confirm-writes'
|
| 9 |
+
| 'confirm-network'
|
| 10 |
+
| 'manual-all'
|
| 11 |
+
| 'locked-down';
|
| 12 |
+
|
| 13 |
+
export interface PolicyConfig {
|
| 14 |
+
mode: PolicyMode;
|
| 15 |
+
allowedTools?: string[];
|
| 16 |
+
blockedTools?: string[];
|
| 17 |
+
maxCostUsd?: number;
|
| 18 |
+
maxTokens?: number;
|
| 19 |
+
}
|
| 20 |
+
|
| 21 |
+
export class PolicyEngine {
|
| 22 |
+
constructor(private config: PolicyConfig) {}
|
| 23 |
+
|
| 24 |
+
requiresApproval(permission: PermissionLevel, toolName: string): boolean {
|
| 25 |
+
if (this.config.blockedTools?.includes(toolName)) return true;
|
| 26 |
+
if (this.config.allowedTools && !this.config.allowedTools.includes(toolName)) return true;
|
| 27 |
+
|
| 28 |
+
switch (this.config.mode) {
|
| 29 |
+
case 'auto':
|
| 30 |
+
return permission === 'dangerous';
|
| 31 |
+
case 'confirm-writes':
|
| 32 |
+
return ['write', 'exec', 'network', 'dangerous'].includes(permission);
|
| 33 |
+
case 'confirm-network':
|
| 34 |
+
return ['network', 'dangerous'].includes(permission);
|
| 35 |
+
case 'manual-all':
|
| 36 |
+
return true;
|
| 37 |
+
case 'locked-down':
|
| 38 |
+
return true;
|
| 39 |
+
}
|
| 40 |
+
}
|
| 41 |
+
|
| 42 |
+
isBlocked(toolName: string): boolean {
|
| 43 |
+
return this.config.blockedTools?.includes(toolName) ?? false;
|
| 44 |
+
}
|
| 45 |
+
|
| 46 |
+
checkBudget(currentUsage: { tokens: number; costUsd: number }): { ok: boolean; reason?: string } {
|
| 47 |
+
if (this.config.maxTokens && currentUsage.tokens >= this.config.maxTokens) {
|
| 48 |
+
return { ok: false, reason: `Token budget exhausted: ${currentUsage.tokens}/${this.config.maxTokens}` };
|
| 49 |
+
}
|
| 50 |
+
if (this.config.maxCostUsd && currentUsage.costUsd >= this.config.maxCostUsd) {
|
| 51 |
+
return { ok: false, reason: `Cost budget exhausted: $${currentUsage.costUsd.toFixed(4)}/$${this.config.maxCostUsd}` };
|
| 52 |
+
}
|
| 53 |
+
return { ok: true };
|
| 54 |
+
}
|
| 55 |
+
}
|
src/core/provider/index.ts
ADDED
|
@@ -0,0 +1,99 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
// ─── Provider Adapter Interface ─────────────────────────────────────────────
|
| 2 |
+
// Normalizes differences between model vendors while exposing capabilities.
|
| 3 |
+
|
| 4 |
+
import { z } from 'zod';
|
| 5 |
+
import type { TokenUsage } from '../events/index.js';
|
| 6 |
+
|
| 7 |
+
// ─── Model Capabilities ─────────────────────────────────────────────────────
|
| 8 |
+
export type ModelCapability =
|
| 9 |
+
| 'streaming'
|
| 10 |
+
| 'tool-calling'
|
| 11 |
+
| 'structured-output'
|
| 12 |
+
| 'vision'
|
| 13 |
+
| 'long-context'
|
| 14 |
+
| 'json-mode'
|
| 15 |
+
| 'reasoning';
|
| 16 |
+
|
| 17 |
+
export interface ModelInfo {
|
| 18 |
+
id: string;
|
| 19 |
+
name: string;
|
| 20 |
+
provider: string;
|
| 21 |
+
contextWindow: number;
|
| 22 |
+
maxOutputTokens?: number;
|
| 23 |
+
capabilities: ModelCapability[];
|
| 24 |
+
costPerMillionInput?: number;
|
| 25 |
+
costPerMillionOutput?: number;
|
| 26 |
+
}
|
| 27 |
+
|
| 28 |
+
// ─── Messages ───────────────────────────────────────────────────────────────
|
| 29 |
+
export type MessageRole = 'system' | 'user' | 'assistant' | 'tool';
|
| 30 |
+
|
| 31 |
+
export interface Message {
|
| 32 |
+
role: MessageRole;
|
| 33 |
+
content: string;
|
| 34 |
+
name?: string;
|
| 35 |
+
toolCallId?: string;
|
| 36 |
+
toolCalls?: ToolCallRequest[];
|
| 37 |
+
}
|
| 38 |
+
|
| 39 |
+
export interface ToolCallRequest {
|
| 40 |
+
id: string;
|
| 41 |
+
name: string;
|
| 42 |
+
arguments: string; // JSON string
|
| 43 |
+
}
|
| 44 |
+
|
| 45 |
+
// ─── Invocation ─────────────────────────────────────────────────────────────
|
| 46 |
+
export interface ModelInvocation {
|
| 47 |
+
model: string;
|
| 48 |
+
messages: Message[];
|
| 49 |
+
tools?: ToolDefinition[];
|
| 50 |
+
temperature?: number;
|
| 51 |
+
maxTokens?: number;
|
| 52 |
+
jsonMode?: boolean;
|
| 53 |
+
stop?: string[];
|
| 54 |
+
signal?: AbortSignal;
|
| 55 |
+
}
|
| 56 |
+
|
| 57 |
+
export interface ToolDefinition {
|
| 58 |
+
name: string;
|
| 59 |
+
description: string;
|
| 60 |
+
parameters: Record<string, unknown>; // JSON Schema
|
| 61 |
+
}
|
| 62 |
+
|
| 63 |
+
// ─── Result ─────────────────────────────────────────────────────────────────
|
| 64 |
+
export interface ModelResult {
|
| 65 |
+
content: string;
|
| 66 |
+
toolCalls?: ToolCallRequest[];
|
| 67 |
+
usage: TokenUsage;
|
| 68 |
+
finishReason: 'stop' | 'tool_calls' | 'length' | 'error';
|
| 69 |
+
rawResponse?: unknown;
|
| 70 |
+
}
|
| 71 |
+
|
| 72 |
+
// ─── Streaming Events ───────────────────────────────────────────────────────
|
| 73 |
+
export type ModelStreamEvent =
|
| 74 |
+
| { type: 'text-delta'; text: string }
|
| 75 |
+
| { type: 'tool-call-start'; id: string; name: string }
|
| 76 |
+
| { type: 'tool-call-delta'; id: string; argumentsDelta: string }
|
| 77 |
+
| { type: 'tool-call-end'; id: string }
|
| 78 |
+
| { type: 'finish'; result: ModelResult };
|
| 79 |
+
|
| 80 |
+
// ─── Provider Adapter ───────────────────────────────────────────────────────
|
| 81 |
+
export interface ProviderAdapter {
|
| 82 |
+
id: string;
|
| 83 |
+
label: string;
|
| 84 |
+
listModels(): Promise<ModelInfo[]>;
|
| 85 |
+
invoke(input: ModelInvocation): Promise<ModelResult>;
|
| 86 |
+
stream(input: ModelInvocation): AsyncIterable<ModelStreamEvent>;
|
| 87 |
+
supports(capability: ModelCapability, model?: string): boolean;
|
| 88 |
+
}
|
| 89 |
+
|
| 90 |
+
// ─── Provider Config ────────────────────────────────────────────────────────
|
| 91 |
+
export const ProviderConfigSchema = z.object({
|
| 92 |
+
id: z.string(),
|
| 93 |
+
apiKey: z.string().optional(),
|
| 94 |
+
baseUrl: z.string().optional(),
|
| 95 |
+
defaultModel: z.string().optional(),
|
| 96 |
+
orgId: z.string().optional(),
|
| 97 |
+
headers: z.record(z.string()).optional(),
|
| 98 |
+
});
|
| 99 |
+
export type ProviderConfig = z.infer<typeof ProviderConfigSchema>;
|
src/core/runtime/index.ts
ADDED
|
@@ -0,0 +1,279 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
// ─── Runtime ────────────────────────────────────────────────────────────────
|
| 2 |
+
// The heart of the harness: manages session state, orchestration, tool execution,
|
| 3 |
+
// budgets, retries, and structured planner/executor/evaluator roles.
|
| 4 |
+
|
| 5 |
+
import { nanoid } from 'nanoid';
|
| 6 |
+
import { EventBus, now, type HarnessEvent, type PlanItem, type TokenUsage, type ToolCallRecord } from '../events/index.js';
|
| 7 |
+
import type { ProviderAdapter, ModelInvocation, Message, ToolCallRequest, ModelResult } from '../provider/index.js';
|
| 8 |
+
import { ToolRegistry, type ToolDef, type ToolContext } from '../tools/index.js';
|
| 9 |
+
import { SkillRegistry } from '../skills/index.js';
|
| 10 |
+
import { PolicyEngine } from '../policy/index.js';
|
| 11 |
+
import { MetricsCollector } from '../observability/index.js';
|
| 12 |
+
import { ArtifactStore } from '../artifacts/index.js';
|
| 13 |
+
import { Evaluator, type EvalContext } from '../evaluators/index.js';
|
| 14 |
+
|
| 15 |
+
// ─── Session State ──────────────────────────────────────────────────────────
|
| 16 |
+
export interface SessionState {
|
| 17 |
+
id: string;
|
| 18 |
+
goal: string;
|
| 19 |
+
plan: PlanItem[];
|
| 20 |
+
messages: Message[];
|
| 21 |
+
artifacts: string[]; // artifact IDs
|
| 22 |
+
provider: string;
|
| 23 |
+
model: string;
|
| 24 |
+
skills: string[];
|
| 25 |
+
budgetUsed: { tokens: number; costUsd: number };
|
| 26 |
+
status: 'running' | 'completed' | 'failed' | 'paused';
|
| 27 |
+
retries: number;
|
| 28 |
+
maxRetries: number;
|
| 29 |
+
createdAt: string;
|
| 30 |
+
}
|
| 31 |
+
|
| 32 |
+
export interface RuntimeConfig {
|
| 33 |
+
provider: ProviderAdapter;
|
| 34 |
+
model: string;
|
| 35 |
+
tools: ToolRegistry;
|
| 36 |
+
skills: SkillRegistry;
|
| 37 |
+
policy: PolicyEngine;
|
| 38 |
+
metrics: MetricsCollector;
|
| 39 |
+
artifacts: ArtifactStore;
|
| 40 |
+
evaluator: Evaluator;
|
| 41 |
+
eventBus: EventBus;
|
| 42 |
+
systemPrompt: string;
|
| 43 |
+
activeSkills: string[];
|
| 44 |
+
maxRetries?: number;
|
| 45 |
+
maxTurns?: number;
|
| 46 |
+
budgetTokens?: number;
|
| 47 |
+
budgetCostUsd?: number;
|
| 48 |
+
approvalHandler?: (toolCall: ToolCallRecord) => Promise<boolean>;
|
| 49 |
+
}
|
| 50 |
+
|
| 51 |
+
// ─── Runtime ────────────────────────────────────────────────────────────────
|
| 52 |
+
export class Runtime {
|
| 53 |
+
private state: SessionState;
|
| 54 |
+
private config: RuntimeConfig;
|
| 55 |
+
private abortController = new AbortController();
|
| 56 |
+
private turn = 0;
|
| 57 |
+
|
| 58 |
+
constructor(config: RuntimeConfig, goal: string) {
|
| 59 |
+
this.config = config;
|
| 60 |
+
this.state = {
|
| 61 |
+
id: nanoid(),
|
| 62 |
+
goal,
|
| 63 |
+
plan: [],
|
| 64 |
+
messages: [],
|
| 65 |
+
artifacts: [],
|
| 66 |
+
provider: config.provider.id,
|
| 67 |
+
model: config.model,
|
| 68 |
+
skills: config.activeSkills,
|
| 69 |
+
budgetUsed: { tokens: 0, costUsd: 0 },
|
| 70 |
+
status: 'running',
|
| 71 |
+
retries: 0,
|
| 72 |
+
maxRetries: config.maxRetries ?? 3,
|
| 73 |
+
createdAt: now(),
|
| 74 |
+
};
|
| 75 |
+
}
|
| 76 |
+
|
| 77 |
+
get sessionId(): string { return this.state.id; }
|
| 78 |
+
get status(): string { return this.state.status; }
|
| 79 |
+
|
| 80 |
+
private emit(event: HarnessEvent): void {
|
| 81 |
+
this.config.eventBus.emit(event);
|
| 82 |
+
}
|
| 83 |
+
|
| 84 |
+
async run(): Promise<SessionState> {
|
| 85 |
+
this.emit({ type: 'session.started', sessionId: this.state.id, goal: this.state.goal, timestamp: now() });
|
| 86 |
+
|
| 87 |
+
// Build system message
|
| 88 |
+
const skillInstructions = this.config.skills.buildInstructions(this.config.activeSkills);
|
| 89 |
+
const systemMsg: Message = {
|
| 90 |
+
role: 'system',
|
| 91 |
+
content: [this.config.systemPrompt, skillInstructions].filter(Boolean).join('\n\n---\n\n'),
|
| 92 |
+
};
|
| 93 |
+
this.state.messages = [systemMsg, { role: 'user', content: this.state.goal }];
|
| 94 |
+
|
| 95 |
+
const maxTurns = this.config.maxTurns ?? 20;
|
| 96 |
+
|
| 97 |
+
try {
|
| 98 |
+
while (this.state.status === 'running' && this.turn < maxTurns) {
|
| 99 |
+
this.turn++;
|
| 100 |
+
|
| 101 |
+
// Budget check
|
| 102 |
+
const budgetCheck = this.config.policy.checkBudget(this.state.budgetUsed);
|
| 103 |
+
if (!budgetCheck.ok) {
|
| 104 |
+
this.emit({ type: 'budget.warning', usage: { promptTokens: 0, completionTokens: 0, totalTokens: this.state.budgetUsed.tokens, estimatedCostUsd: this.state.budgetUsed.costUsd }, limit: this.config.budgetTokens ?? 0, timestamp: now() });
|
| 105 |
+
break;
|
| 106 |
+
}
|
| 107 |
+
|
| 108 |
+
// Invoke model
|
| 109 |
+
const toolDefs = this.config.tools.listForModel();
|
| 110 |
+
const invocation: ModelInvocation = {
|
| 111 |
+
model: this.config.model,
|
| 112 |
+
messages: this.state.messages,
|
| 113 |
+
tools: toolDefs,
|
| 114 |
+
signal: this.abortController.signal,
|
| 115 |
+
};
|
| 116 |
+
|
| 117 |
+
this.emit({ type: 'model.request.start', provider: this.config.provider.id, model: this.config.model, timestamp: now() });
|
| 118 |
+
const startMs = Date.now();
|
| 119 |
+
|
| 120 |
+
let result: ModelResult;
|
| 121 |
+
try {
|
| 122 |
+
result = await this.config.provider.invoke(invocation);
|
| 123 |
+
} catch (err) {
|
| 124 |
+
const errMsg = err instanceof Error ? err.message : String(err);
|
| 125 |
+
this.emit({ type: 'error', message: `Model call failed: ${errMsg}`, timestamp: now() });
|
| 126 |
+
if (this.state.retries < this.state.maxRetries) {
|
| 127 |
+
this.state.retries++;
|
| 128 |
+
continue;
|
| 129 |
+
}
|
| 130 |
+
this.state.status = 'failed';
|
| 131 |
+
break;
|
| 132 |
+
}
|
| 133 |
+
|
| 134 |
+
const durationMs = Date.now() - startMs;
|
| 135 |
+
this.state.budgetUsed.tokens += result.usage.totalTokens;
|
| 136 |
+
this.state.budgetUsed.costUsd += result.usage.estimatedCostUsd ?? 0;
|
| 137 |
+
|
| 138 |
+
this.emit({ type: 'model.request.end', provider: this.config.provider.id, model: this.config.model, usage: result.usage, durationMs, timestamp: now() });
|
| 139 |
+
this.config.metrics.record({ timestamp: now(), type: 'model-call', provider: this.config.provider.id, model: this.config.model, durationMs, usage: result.usage, success: true });
|
| 140 |
+
|
| 141 |
+
// Handle assistant response
|
| 142 |
+
if (result.content) {
|
| 143 |
+
this.emit({ type: 'model.stream.end', fullText: result.content, timestamp: now() });
|
| 144 |
+
}
|
| 145 |
+
|
| 146 |
+
this.state.messages.push({
|
| 147 |
+
role: 'assistant',
|
| 148 |
+
content: result.content,
|
| 149 |
+
toolCalls: result.toolCalls,
|
| 150 |
+
});
|
| 151 |
+
|
| 152 |
+
// If no tool calls, we're done
|
| 153 |
+
if (!result.toolCalls || result.toolCalls.length === 0) {
|
| 154 |
+
this.state.status = 'completed';
|
| 155 |
+
break;
|
| 156 |
+
}
|
| 157 |
+
|
| 158 |
+
// Execute tool calls
|
| 159 |
+
for (const tc of result.toolCalls) {
|
| 160 |
+
const toolResult = await this.executeTool(tc);
|
| 161 |
+
this.state.messages.push({
|
| 162 |
+
role: 'tool',
|
| 163 |
+
content: typeof toolResult === 'string' ? toolResult : JSON.stringify(toolResult),
|
| 164 |
+
toolCallId: tc.id,
|
| 165 |
+
});
|
| 166 |
+
}
|
| 167 |
+
}
|
| 168 |
+
|
| 169 |
+
// Run evaluation
|
| 170 |
+
if (this.state.status === 'completed') {
|
| 171 |
+
this.emit({ type: 'evaluation.started', timestamp: now() });
|
| 172 |
+
const evalCtx: EvalContext = {
|
| 173 |
+
sessionId: this.state.id,
|
| 174 |
+
goal: this.state.goal,
|
| 175 |
+
artifacts: this.config.artifacts.list().map((a) => ({ path: a.path, content: a.content, type: a.type })),
|
| 176 |
+
assistantOutput: this.state.messages.filter((m) => m.role === 'assistant').map((m) => m.content).join('\n'),
|
| 177 |
+
workDir: process.cwd(),
|
| 178 |
+
};
|
| 179 |
+
const report = await this.config.evaluator.evaluate(evalCtx);
|
| 180 |
+
this.emit({ type: 'evaluation.completed', report, timestamp: now() });
|
| 181 |
+
}
|
| 182 |
+
|
| 183 |
+
this.emit({ type: 'session.completed', sessionId: this.state.id, summary: `Completed in ${this.turn} turns.`, timestamp: now() });
|
| 184 |
+
} catch (err) {
|
| 185 |
+
const errMsg = err instanceof Error ? err.message : String(err);
|
| 186 |
+
this.state.status = 'failed';
|
| 187 |
+
this.emit({ type: 'session.failed', sessionId: this.state.id, error: errMsg, timestamp: now() });
|
| 188 |
+
}
|
| 189 |
+
|
| 190 |
+
return this.state;
|
| 191 |
+
}
|
| 192 |
+
|
| 193 |
+
private async executeTool(tc: ToolCallRequest): Promise<unknown> {
|
| 194 |
+
const tool = this.config.tools.getByName(tc.name);
|
| 195 |
+
if (!tool) {
|
| 196 |
+
const errMsg = `Unknown tool: ${tc.name}`;
|
| 197 |
+
this.emit({ type: 'tool.failed', toolCallId: tc.id, error: errMsg, durationMs: 0, timestamp: now() });
|
| 198 |
+
return { error: errMsg };
|
| 199 |
+
}
|
| 200 |
+
|
| 201 |
+
// Parse input
|
| 202 |
+
let input: unknown;
|
| 203 |
+
try {
|
| 204 |
+
const raw = JSON.parse(tc.arguments);
|
| 205 |
+
input = tool.inputSchema.parse(raw);
|
| 206 |
+
} catch (err) {
|
| 207 |
+
const errMsg = `Invalid tool input: ${err instanceof Error ? err.message : String(err)}`;
|
| 208 |
+
this.emit({ type: 'tool.failed', toolCallId: tc.id, error: errMsg, durationMs: 0, timestamp: now() });
|
| 209 |
+
return { error: errMsg };
|
| 210 |
+
}
|
| 211 |
+
|
| 212 |
+
// Policy check
|
| 213 |
+
if (this.config.policy.isBlocked(tool.name)) {
|
| 214 |
+
this.emit({ type: 'tool.denied', toolCallId: tc.id, reason: 'Tool is blocked by policy.', timestamp: now() });
|
| 215 |
+
return { error: 'Tool blocked by policy.' };
|
| 216 |
+
}
|
| 217 |
+
|
| 218 |
+
const needsApproval = this.config.policy.requiresApproval(tool.permission, tool.name);
|
| 219 |
+
const record: ToolCallRecord = {
|
| 220 |
+
id: tc.id,
|
| 221 |
+
toolId: tool.id,
|
| 222 |
+
toolName: tool.name,
|
| 223 |
+
input,
|
| 224 |
+
status: needsApproval ? 'awaiting-approval' : 'queued',
|
| 225 |
+
};
|
| 226 |
+
this.emit({ type: 'tool.requested', toolCall: record, timestamp: now() });
|
| 227 |
+
|
| 228 |
+
if (needsApproval && this.config.approvalHandler) {
|
| 229 |
+
const approved = await this.config.approvalHandler(record);
|
| 230 |
+
if (!approved) {
|
| 231 |
+
this.emit({ type: 'tool.denied', toolCallId: tc.id, reason: 'User denied.', timestamp: now() });
|
| 232 |
+
return { error: 'Tool call denied by user.' };
|
| 233 |
+
}
|
| 234 |
+
this.emit({ type: 'tool.approved', toolCallId: tc.id, timestamp: now() });
|
| 235 |
+
}
|
| 236 |
+
|
| 237 |
+
// Execute
|
| 238 |
+
this.emit({ type: 'tool.started', toolCallId: tc.id, timestamp: now() });
|
| 239 |
+
const startMs = Date.now();
|
| 240 |
+
|
| 241 |
+
const ctx: ToolContext = {
|
| 242 |
+
sessionId: this.state.id,
|
| 243 |
+
workDir: process.cwd(),
|
| 244 |
+
signal: this.abortController.signal,
|
| 245 |
+
emit: (msg) => this.emit({ type: 'tool.progress', toolCallId: tc.id, message: msg, timestamp: now() }),
|
| 246 |
+
};
|
| 247 |
+
|
| 248 |
+
let retries = 0;
|
| 249 |
+
while (retries <= tool.retries) {
|
| 250 |
+
try {
|
| 251 |
+
const result = await Promise.race([
|
| 252 |
+
tool.execute(input, ctx),
|
| 253 |
+
new Promise((_, reject) => setTimeout(() => reject(new Error('Tool timeout')), tool.timeout)),
|
| 254 |
+
]);
|
| 255 |
+
const durationMs = Date.now() - startMs;
|
| 256 |
+
this.emit({ type: 'tool.finished', toolCallId: tc.id, result, durationMs, timestamp: now() });
|
| 257 |
+
this.config.metrics.record({ timestamp: now(), type: 'tool-call', toolName: tool.name, durationMs, success: true });
|
| 258 |
+
return result;
|
| 259 |
+
} catch (err) {
|
| 260 |
+
retries++;
|
| 261 |
+
if (retries > tool.retries) {
|
| 262 |
+
const durationMs = Date.now() - startMs;
|
| 263 |
+
const errMsg = err instanceof Error ? err.message : String(err);
|
| 264 |
+
this.emit({ type: 'tool.failed', toolCallId: tc.id, error: errMsg, durationMs, timestamp: now() });
|
| 265 |
+
this.config.metrics.record({ timestamp: now(), type: 'tool-call', toolName: tool.name, durationMs, success: false, error: errMsg });
|
| 266 |
+
return { error: errMsg };
|
| 267 |
+
}
|
| 268 |
+
this.config.metrics.record({ timestamp: now(), type: 'retry', toolName: tool.name, durationMs: 0, success: false });
|
| 269 |
+
}
|
| 270 |
+
}
|
| 271 |
+
|
| 272 |
+
return { error: 'Unexpected tool execution path.' };
|
| 273 |
+
}
|
| 274 |
+
|
| 275 |
+
pause(): void { this.state.status = 'paused'; }
|
| 276 |
+
resume(): void { if (this.state.status === 'paused') this.state.status = 'running'; }
|
| 277 |
+
cancel(): void { this.abortController.abort(); this.state.status = 'failed'; }
|
| 278 |
+
getState(): Readonly<SessionState> { return this.state; }
|
| 279 |
+
}
|
src/core/skills/index.ts
ADDED
|
@@ -0,0 +1,39 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
// ─── Skills System ──────────────────────────────────────────────────────────
|
| 2 |
+
// Modular instruction packs attachable per task or session.
|
| 3 |
+
|
| 4 |
+
export interface SkillModule {
|
| 5 |
+
id: string;
|
| 6 |
+
title: string;
|
| 7 |
+
description: string;
|
| 8 |
+
instructions: string;
|
| 9 |
+
suggestedTools?: string[];
|
| 10 |
+
tags?: string[];
|
| 11 |
+
}
|
| 12 |
+
|
| 13 |
+
export class SkillRegistry {
|
| 14 |
+
private skills = new Map<string, SkillModule>();
|
| 15 |
+
|
| 16 |
+
register(skill: SkillModule): void {
|
| 17 |
+
this.skills.set(skill.id, skill);
|
| 18 |
+
}
|
| 19 |
+
|
| 20 |
+
get(id: string): SkillModule | undefined {
|
| 21 |
+
return this.skills.get(id);
|
| 22 |
+
}
|
| 23 |
+
|
| 24 |
+
list(): SkillModule[] {
|
| 25 |
+
return [...this.skills.values()];
|
| 26 |
+
}
|
| 27 |
+
|
| 28 |
+
resolve(ids: string[]): SkillModule[] {
|
| 29 |
+
return ids.map((id) => this.get(id)).filter((s): s is SkillModule => !!s);
|
| 30 |
+
}
|
| 31 |
+
|
| 32 |
+
buildInstructions(ids: string[]): string {
|
| 33 |
+
const skills = this.resolve(ids);
|
| 34 |
+
if (!skills.length) return '';
|
| 35 |
+
return skills
|
| 36 |
+
.map((s) => `## Skill: ${s.title}\n\n${s.instructions}`)
|
| 37 |
+
.join('\n\n---\n\n');
|
| 38 |
+
}
|
| 39 |
+
}
|
src/core/tools/index.ts
ADDED
|
@@ -0,0 +1,113 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
// ─── Tool Registry ──────────────────────────────────────────────────────────
|
| 2 |
+
// First-class subsystem for typed, permissioned, observable tool execution.
|
| 3 |
+
|
| 4 |
+
import { z, ZodSchema } from 'zod';
|
| 5 |
+
|
| 6 |
+
// ─── Permission Levels ──────────────────────────────────────────────────────
|
| 7 |
+
export type PermissionLevel = 'read' | 'write' | 'exec' | 'network' | 'dangerous';
|
| 8 |
+
|
| 9 |
+
// ─── Tool Status ────────────────────────────────────────────────────────────
|
| 10 |
+
export type ToolStatus =
|
| 11 |
+
| 'queued'
|
| 12 |
+
| 'awaiting-approval'
|
| 13 |
+
| 'running'
|
| 14 |
+
| 'streaming'
|
| 15 |
+
| 'success'
|
| 16 |
+
| 'failed'
|
| 17 |
+
| 'denied'
|
| 18 |
+
| 'cancelled';
|
| 19 |
+
|
| 20 |
+
// ─── Side Effect Classification ─────────────────────────────────────────────
|
| 21 |
+
export type SideEffect = 'none' | 'filesystem' | 'network' | 'process' | 'mixed';
|
| 22 |
+
|
| 23 |
+
// ─── Tool Definition ────────────────────────────────────────────────────────
|
| 24 |
+
export interface ToolDef<TInput = unknown, TOutput = unknown> {
|
| 25 |
+
id: string;
|
| 26 |
+
name: string;
|
| 27 |
+
description: string;
|
| 28 |
+
inputSchema: ZodSchema<TInput>;
|
| 29 |
+
outputSchema: ZodSchema<TOutput>;
|
| 30 |
+
permission: PermissionLevel;
|
| 31 |
+
sideEffect: SideEffect;
|
| 32 |
+
timeout: number; // ms
|
| 33 |
+
retries: number;
|
| 34 |
+
tags?: string[];
|
| 35 |
+
renderer?: {
|
| 36 |
+
icon?: string;
|
| 37 |
+
color?: string;
|
| 38 |
+
compact?: boolean;
|
| 39 |
+
};
|
| 40 |
+
execute(input: TInput, ctx: ToolContext): Promise<TOutput>;
|
| 41 |
+
}
|
| 42 |
+
|
| 43 |
+
// ─── Tool Context ───────────────────────────────────────────────────────────
|
| 44 |
+
export interface ToolContext {
|
| 45 |
+
sessionId: string;
|
| 46 |
+
workDir: string;
|
| 47 |
+
signal?: AbortSignal;
|
| 48 |
+
emit(message: string): void; // for progress updates
|
| 49 |
+
}
|
| 50 |
+
|
| 51 |
+
// ─── Tool Registry ──────────────────────────────────────────────────────────
|
| 52 |
+
export class ToolRegistry {
|
| 53 |
+
private tools = new Map<string, ToolDef<any, any>>();
|
| 54 |
+
|
| 55 |
+
register<TI, TO>(tool: ToolDef<TI, TO>): void {
|
| 56 |
+
if (this.tools.has(tool.id)) {
|
| 57 |
+
throw new Error(`Tool already registered: ${tool.id}`);
|
| 58 |
+
}
|
| 59 |
+
this.tools.set(tool.id, tool);
|
| 60 |
+
}
|
| 61 |
+
|
| 62 |
+
get(id: string): ToolDef | undefined {
|
| 63 |
+
return this.tools.get(id);
|
| 64 |
+
}
|
| 65 |
+
|
| 66 |
+
getByName(name: string): ToolDef | undefined {
|
| 67 |
+
for (const tool of this.tools.values()) {
|
| 68 |
+
if (tool.name === name) return tool;
|
| 69 |
+
}
|
| 70 |
+
return undefined;
|
| 71 |
+
}
|
| 72 |
+
|
| 73 |
+
list(): ToolDef[] {
|
| 74 |
+
return [...this.tools.values()];
|
| 75 |
+
}
|
| 76 |
+
|
| 77 |
+
listForModel(): Array<{ name: string; description: string; parameters: Record<string, unknown> }> {
|
| 78 |
+
return this.list().map((t) => ({
|
| 79 |
+
name: t.name,
|
| 80 |
+
description: t.description,
|
| 81 |
+
parameters: this.zodToJsonSchema(t.inputSchema),
|
| 82 |
+
}));
|
| 83 |
+
}
|
| 84 |
+
|
| 85 |
+
private zodToJsonSchema(schema: ZodSchema): Record<string, unknown> {
|
| 86 |
+
// Minimal Zod-to-JSON-Schema converter for tool definitions
|
| 87 |
+
// In production, use zod-to-json-schema package
|
| 88 |
+
const desc = (schema as any)._def;
|
| 89 |
+
if (desc?.typeName === 'ZodObject') {
|
| 90 |
+
const shape = desc.shape();
|
| 91 |
+
const properties: Record<string, unknown> = {};
|
| 92 |
+
const required: string[] = [];
|
| 93 |
+
for (const [key, value] of Object.entries(shape)) {
|
| 94 |
+
const fieldDef = (value as any)._def;
|
| 95 |
+
properties[key] = { type: this.zodTypeToJson(fieldDef), description: fieldDef?.description || '' };
|
| 96 |
+
if (fieldDef?.typeName !== 'ZodOptional') required.push(key);
|
| 97 |
+
}
|
| 98 |
+
return { type: 'object', properties, required };
|
| 99 |
+
}
|
| 100 |
+
return { type: 'object', properties: {} };
|
| 101 |
+
}
|
| 102 |
+
|
| 103 |
+
private zodTypeToJson(def: any): string {
|
| 104 |
+
switch (def?.typeName) {
|
| 105 |
+
case 'ZodString': return 'string';
|
| 106 |
+
case 'ZodNumber': return 'number';
|
| 107 |
+
case 'ZodBoolean': return 'boolean';
|
| 108 |
+
case 'ZodArray': return 'array';
|
| 109 |
+
case 'ZodOptional': return this.zodTypeToJson(def.innerType?._def);
|
| 110 |
+
default: return 'string';
|
| 111 |
+
}
|
| 112 |
+
}
|
| 113 |
+
}
|
src/providers/anthropic/index.ts
ADDED
|
@@ -0,0 +1,159 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
// ─── Anthropic Provider Adapter ──────────────────────────────────────────────
|
| 2 |
+
import type {
|
| 3 |
+
ProviderAdapter, ProviderConfig, ModelInfo, ModelCapability,
|
| 4 |
+
ModelInvocation, ModelResult, ModelStreamEvent, ToolCallRequest,
|
| 5 |
+
} from '../../core/provider/index.js';
|
| 6 |
+
import type { TokenUsage } from '../../core/events/index.js';
|
| 7 |
+
|
| 8 |
+
export class AnthropicProvider implements ProviderAdapter {
|
| 9 |
+
id = 'anthropic';
|
| 10 |
+
label = 'Anthropic';
|
| 11 |
+
private apiKey: string;
|
| 12 |
+
private baseUrl: string;
|
| 13 |
+
|
| 14 |
+
constructor(config: ProviderConfig) {
|
| 15 |
+
this.apiKey = config.apiKey ?? process.env['ANTHROPIC_API_KEY'] ?? '';
|
| 16 |
+
this.baseUrl = config.baseUrl ?? 'https://api.anthropic.com';
|
| 17 |
+
}
|
| 18 |
+
|
| 19 |
+
async listModels(): Promise<ModelInfo[]> {
|
| 20 |
+
return [
|
| 21 |
+
{ id: 'claude-sonnet-4-20250514', name: 'Claude Sonnet 4', provider: 'anthropic', contextWindow: 200000, maxOutputTokens: 64000, capabilities: ['streaming', 'tool-calling', 'vision', 'long-context', 'reasoning'], costPerMillionInput: 3, costPerMillionOutput: 15 },
|
| 22 |
+
{ id: 'claude-opus-4-20250514', name: 'Claude Opus 4', provider: 'anthropic', contextWindow: 200000, maxOutputTokens: 32000, capabilities: ['streaming', 'tool-calling', 'vision', 'long-context', 'reasoning'], costPerMillionInput: 15, costPerMillionOutput: 75 },
|
| 23 |
+
{ id: 'claude-3-5-haiku-20241022', name: 'Claude 3.5 Haiku', provider: 'anthropic', contextWindow: 200000, maxOutputTokens: 8192, capabilities: ['streaming', 'tool-calling', 'vision'], costPerMillionInput: 0.8, costPerMillionOutput: 4 },
|
| 24 |
+
];
|
| 25 |
+
}
|
| 26 |
+
|
| 27 |
+
supports(capability: ModelCapability): boolean {
|
| 28 |
+
return ['streaming', 'tool-calling', 'vision', 'long-context'].includes(capability);
|
| 29 |
+
}
|
| 30 |
+
|
| 31 |
+
async invoke(input: ModelInvocation): Promise<ModelResult> {
|
| 32 |
+
const body = this.buildBody(input);
|
| 33 |
+
const res = await fetch(`${this.baseUrl}/v1/messages`, {
|
| 34 |
+
method: 'POST',
|
| 35 |
+
headers: {
|
| 36 |
+
'Content-Type': 'application/json',
|
| 37 |
+
'x-api-key': this.apiKey,
|
| 38 |
+
'anthropic-version': '2023-06-01',
|
| 39 |
+
},
|
| 40 |
+
body: JSON.stringify(body),
|
| 41 |
+
signal: input.signal,
|
| 42 |
+
});
|
| 43 |
+
if (!res.ok) throw new Error(`Anthropic API error: ${res.status} ${await res.text()}`);
|
| 44 |
+
const data = await res.json() as any;
|
| 45 |
+
return this.parseResponse(data);
|
| 46 |
+
}
|
| 47 |
+
|
| 48 |
+
async *stream(input: ModelInvocation): AsyncIterable<ModelStreamEvent> {
|
| 49 |
+
const body = { ...this.buildBody(input), stream: true };
|
| 50 |
+
const res = await fetch(`${this.baseUrl}/v1/messages`, {
|
| 51 |
+
method: 'POST',
|
| 52 |
+
headers: {
|
| 53 |
+
'Content-Type': 'application/json',
|
| 54 |
+
'x-api-key': this.apiKey,
|
| 55 |
+
'anthropic-version': '2023-06-01',
|
| 56 |
+
},
|
| 57 |
+
body: JSON.stringify(body),
|
| 58 |
+
signal: input.signal,
|
| 59 |
+
});
|
| 60 |
+
if (!res.ok) throw new Error(`Anthropic API error: ${res.status} ${await res.text()}`);
|
| 61 |
+
|
| 62 |
+
const reader = res.body!.getReader();
|
| 63 |
+
const decoder = new TextDecoder();
|
| 64 |
+
let buffer = '';
|
| 65 |
+
let fullText = '';
|
| 66 |
+
const toolCalls: Array<{ id: string; name: string; args: string }> = [];
|
| 67 |
+
let currentToolIdx = -1;
|
| 68 |
+
|
| 69 |
+
while (true) {
|
| 70 |
+
const { done, value } = await reader.read();
|
| 71 |
+
if (done) break;
|
| 72 |
+
buffer += decoder.decode(value, { stream: true });
|
| 73 |
+
const lines = buffer.split('\n');
|
| 74 |
+
buffer = lines.pop() ?? '';
|
| 75 |
+
|
| 76 |
+
for (const line of lines) {
|
| 77 |
+
if (!line.startsWith('data: ')) continue;
|
| 78 |
+
const event = JSON.parse(line.slice(6)) as any;
|
| 79 |
+
|
| 80 |
+
if (event.type === 'content_block_start' && event.content_block?.type === 'tool_use') {
|
| 81 |
+
const tc = { id: event.content_block.id, name: event.content_block.name, args: '' };
|
| 82 |
+
toolCalls.push(tc);
|
| 83 |
+
currentToolIdx = toolCalls.length - 1;
|
| 84 |
+
yield { type: 'tool-call-start', id: tc.id, name: tc.name };
|
| 85 |
+
} else if (event.type === 'content_block_delta') {
|
| 86 |
+
if (event.delta?.type === 'text_delta') {
|
| 87 |
+
fullText += event.delta.text;
|
| 88 |
+
yield { type: 'text-delta', text: event.delta.text };
|
| 89 |
+
} else if (event.delta?.type === 'input_json_delta' && currentToolIdx >= 0) {
|
| 90 |
+
toolCalls[currentToolIdx]!.args += event.delta.partial_json;
|
| 91 |
+
yield { type: 'tool-call-delta', id: toolCalls[currentToolIdx]!.id, argumentsDelta: event.delta.partial_json };
|
| 92 |
+
}
|
| 93 |
+
} else if (event.type === 'content_block_stop' && currentToolIdx >= 0) {
|
| 94 |
+
yield { type: 'tool-call-end', id: toolCalls[currentToolIdx]!.id };
|
| 95 |
+
currentToolIdx = -1;
|
| 96 |
+
} else if (event.type === 'message_delta') {
|
| 97 |
+
const usage: TokenUsage = {
|
| 98 |
+
promptTokens: event.usage?.input_tokens ?? 0,
|
| 99 |
+
completionTokens: event.usage?.output_tokens ?? 0,
|
| 100 |
+
totalTokens: (event.usage?.input_tokens ?? 0) + (event.usage?.output_tokens ?? 0),
|
| 101 |
+
};
|
| 102 |
+
const tcReqs: ToolCallRequest[] = toolCalls.map((t) => ({ id: t.id, name: t.name, arguments: t.args }));
|
| 103 |
+
yield {
|
| 104 |
+
type: 'finish',
|
| 105 |
+
result: { content: fullText, toolCalls: tcReqs.length ? tcReqs : undefined, usage, finishReason: tcReqs.length ? 'tool_calls' : 'stop' },
|
| 106 |
+
};
|
| 107 |
+
}
|
| 108 |
+
}
|
| 109 |
+
}
|
| 110 |
+
}
|
| 111 |
+
|
| 112 |
+
private buildBody(input: ModelInvocation): Record<string, unknown> {
|
| 113 |
+
const systemMsg = input.messages.find((m) => m.role === 'system');
|
| 114 |
+
const nonSystemMsgs = input.messages.filter((m) => m.role !== 'system');
|
| 115 |
+
|
| 116 |
+
const messages = nonSystemMsgs.map((m) => {
|
| 117 |
+
if (m.role === 'tool') {
|
| 118 |
+
return { role: 'user', content: [{ type: 'tool_result', tool_use_id: m.toolCallId, content: m.content }] };
|
| 119 |
+
}
|
| 120 |
+
if (m.role === 'assistant' && m.toolCalls?.length) {
|
| 121 |
+
const content: any[] = [];
|
| 122 |
+
if (m.content) content.push({ type: 'text', text: m.content });
|
| 123 |
+
for (const tc of m.toolCalls) {
|
| 124 |
+
content.push({ type: 'tool_use', id: tc.id, name: tc.name, input: JSON.parse(tc.arguments) });
|
| 125 |
+
}
|
| 126 |
+
return { role: 'assistant', content };
|
| 127 |
+
}
|
| 128 |
+
return { role: m.role === 'user' ? 'user' : 'assistant', content: m.content };
|
| 129 |
+
});
|
| 130 |
+
|
| 131 |
+
const body: Record<string, unknown> = { model: input.model, messages, max_tokens: input.maxTokens ?? 8192 };
|
| 132 |
+
if (systemMsg) body.system = systemMsg.content;
|
| 133 |
+
if (input.tools?.length) {
|
| 134 |
+
body.tools = input.tools.map((t) => ({ name: t.name, description: t.description, input_schema: t.parameters }));
|
| 135 |
+
}
|
| 136 |
+
if (input.temperature !== undefined) body.temperature = input.temperature;
|
| 137 |
+
return body;
|
| 138 |
+
}
|
| 139 |
+
|
| 140 |
+
private parseResponse(data: any): ModelResult {
|
| 141 |
+
let content = '';
|
| 142 |
+
const toolCalls: ToolCallRequest[] = [];
|
| 143 |
+
for (const block of data.content ?? []) {
|
| 144 |
+
if (block.type === 'text') content += block.text;
|
| 145 |
+
if (block.type === 'tool_use') toolCalls.push({ id: block.id, name: block.name, arguments: JSON.stringify(block.input) });
|
| 146 |
+
}
|
| 147 |
+
const usage: TokenUsage = {
|
| 148 |
+
promptTokens: data.usage?.input_tokens ?? 0,
|
| 149 |
+
completionTokens: data.usage?.output_tokens ?? 0,
|
| 150 |
+
totalTokens: (data.usage?.input_tokens ?? 0) + (data.usage?.output_tokens ?? 0),
|
| 151 |
+
};
|
| 152 |
+
return {
|
| 153 |
+
content,
|
| 154 |
+
toolCalls: toolCalls.length ? toolCalls : undefined,
|
| 155 |
+
usage,
|
| 156 |
+
finishReason: toolCalls.length ? 'tool_calls' : data.stop_reason === 'max_tokens' ? 'length' : 'stop',
|
| 157 |
+
};
|
| 158 |
+
}
|
| 159 |
+
}
|
src/providers/gemini/index.ts
ADDED
|
@@ -0,0 +1,130 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
// ─── Gemini Provider Adapter ────────────────────────────────────────────────
|
| 2 |
+
import type {
|
| 3 |
+
ProviderAdapter, ProviderConfig, ModelInfo, ModelCapability,
|
| 4 |
+
ModelInvocation, ModelResult, ModelStreamEvent, ToolCallRequest,
|
| 5 |
+
} from '../../core/provider/index.js';
|
| 6 |
+
import type { TokenUsage } from '../../core/events/index.js';
|
| 7 |
+
|
| 8 |
+
export class GeminiProvider implements ProviderAdapter {
|
| 9 |
+
id = 'gemini';
|
| 10 |
+
label = 'Google Gemini';
|
| 11 |
+
private apiKey: string;
|
| 12 |
+
private baseUrl: string;
|
| 13 |
+
|
| 14 |
+
constructor(config: ProviderConfig) {
|
| 15 |
+
this.apiKey = config.apiKey ?? process.env['GEMINI_API_KEY'] ?? '';
|
| 16 |
+
this.baseUrl = config.baseUrl ?? 'https://generativelanguage.googleapis.com/v1beta';
|
| 17 |
+
}
|
| 18 |
+
|
| 19 |
+
async listModels(): Promise<ModelInfo[]> {
|
| 20 |
+
return [
|
| 21 |
+
{ id: 'gemini-2.5-pro', name: 'Gemini 2.5 Pro', provider: 'gemini', contextWindow: 1000000, maxOutputTokens: 65536, capabilities: ['streaming', 'tool-calling', 'vision', 'long-context', 'reasoning'], costPerMillionInput: 1.25, costPerMillionOutput: 10 },
|
| 22 |
+
{ id: 'gemini-2.5-flash', name: 'Gemini 2.5 Flash', provider: 'gemini', contextWindow: 1000000, maxOutputTokens: 65536, capabilities: ['streaming', 'tool-calling', 'vision', 'long-context'], costPerMillionInput: 0.15, costPerMillionOutput: 0.6 },
|
| 23 |
+
];
|
| 24 |
+
}
|
| 25 |
+
|
| 26 |
+
supports(capability: ModelCapability): boolean {
|
| 27 |
+
return ['streaming', 'tool-calling', 'vision', 'long-context'].includes(capability);
|
| 28 |
+
}
|
| 29 |
+
|
| 30 |
+
async invoke(input: ModelInvocation): Promise<ModelResult> {
|
| 31 |
+
const body = this.buildBody(input);
|
| 32 |
+
const url = `${this.baseUrl}/models/${input.model}:generateContent?key=${this.apiKey}`;
|
| 33 |
+
const res = await fetch(url, {
|
| 34 |
+
method: 'POST',
|
| 35 |
+
headers: { 'Content-Type': 'application/json' },
|
| 36 |
+
body: JSON.stringify(body),
|
| 37 |
+
signal: input.signal,
|
| 38 |
+
});
|
| 39 |
+
if (!res.ok) throw new Error(`Gemini API error: ${res.status} ${await res.text()}`);
|
| 40 |
+
const data = await res.json() as any;
|
| 41 |
+
return this.parseResponse(data);
|
| 42 |
+
}
|
| 43 |
+
|
| 44 |
+
async *stream(input: ModelInvocation): AsyncIterable<ModelStreamEvent> {
|
| 45 |
+
const body = this.buildBody(input);
|
| 46 |
+
const url = `${this.baseUrl}/models/${input.model}:streamGenerateContent?key=${this.apiKey}&alt=sse`;
|
| 47 |
+
const res = await fetch(url, {
|
| 48 |
+
method: 'POST',
|
| 49 |
+
headers: { 'Content-Type': 'application/json' },
|
| 50 |
+
body: JSON.stringify(body),
|
| 51 |
+
signal: input.signal,
|
| 52 |
+
});
|
| 53 |
+
if (!res.ok) throw new Error(`Gemini API error: ${res.status} ${await res.text()}`);
|
| 54 |
+
|
| 55 |
+
const reader = res.body!.getReader();
|
| 56 |
+
const decoder = new TextDecoder();
|
| 57 |
+
let buffer = '';
|
| 58 |
+
let fullText = '';
|
| 59 |
+
|
| 60 |
+
while (true) {
|
| 61 |
+
const { done, value } = await reader.read();
|
| 62 |
+
if (done) break;
|
| 63 |
+
buffer += decoder.decode(value, { stream: true });
|
| 64 |
+
const lines = buffer.split('\n');
|
| 65 |
+
buffer = lines.pop() ?? '';
|
| 66 |
+
|
| 67 |
+
for (const line of lines) {
|
| 68 |
+
if (!line.startsWith('data: ')) continue;
|
| 69 |
+
const chunk = JSON.parse(line.slice(6)) as any;
|
| 70 |
+
for (const part of chunk.candidates?.[0]?.content?.parts ?? []) {
|
| 71 |
+
if (part.text) {
|
| 72 |
+
fullText += part.text;
|
| 73 |
+
yield { type: 'text-delta', text: part.text };
|
| 74 |
+
}
|
| 75 |
+
if (part.functionCall) {
|
| 76 |
+
const id = `gemini-tc-${Date.now()}`;
|
| 77 |
+
yield { type: 'tool-call-start', id, name: part.functionCall.name };
|
| 78 |
+
const args = JSON.stringify(part.functionCall.args ?? {});
|
| 79 |
+
yield { type: 'tool-call-delta', id, argumentsDelta: args };
|
| 80 |
+
yield { type: 'tool-call-end', id };
|
| 81 |
+
}
|
| 82 |
+
}
|
| 83 |
+
}
|
| 84 |
+
}
|
| 85 |
+
|
| 86 |
+
const usage: TokenUsage = { promptTokens: 0, completionTokens: 0, totalTokens: 0 };
|
| 87 |
+
yield { type: 'finish', result: { content: fullText, usage, finishReason: 'stop' } };
|
| 88 |
+
}
|
| 89 |
+
|
| 90 |
+
private buildBody(input: ModelInvocation): Record<string, unknown> {
|
| 91 |
+
const contents: any[] = [];
|
| 92 |
+
let systemInstruction: string | undefined;
|
| 93 |
+
|
| 94 |
+
for (const m of input.messages) {
|
| 95 |
+
if (m.role === 'system') { systemInstruction = m.content; continue; }
|
| 96 |
+
const role = m.role === 'assistant' ? 'model' : 'user';
|
| 97 |
+
if (m.role === 'tool') {
|
| 98 |
+
contents.push({ role: 'function', parts: [{ functionResponse: { name: m.name ?? 'tool', response: { result: m.content } } }] });
|
| 99 |
+
} else {
|
| 100 |
+
contents.push({ role, parts: [{ text: m.content }] });
|
| 101 |
+
}
|
| 102 |
+
}
|
| 103 |
+
|
| 104 |
+
const body: Record<string, unknown> = { contents };
|
| 105 |
+
if (systemInstruction) body.systemInstruction = { parts: [{ text: systemInstruction }] };
|
| 106 |
+
if (input.tools?.length) {
|
| 107 |
+
body.tools = [{ functionDeclarations: input.tools.map((t) => ({ name: t.name, description: t.description, parameters: t.parameters })) }];
|
| 108 |
+
}
|
| 109 |
+
if (input.temperature !== undefined) body.generationConfig = { temperature: input.temperature, maxOutputTokens: input.maxTokens ?? 8192 };
|
| 110 |
+
return body;
|
| 111 |
+
}
|
| 112 |
+
|
| 113 |
+
private parseResponse(data: any): ModelResult {
|
| 114 |
+
let content = '';
|
| 115 |
+
const toolCalls: ToolCallRequest[] = [];
|
| 116 |
+
for (const part of data.candidates?.[0]?.content?.parts ?? []) {
|
| 117 |
+
if (part.text) content += part.text;
|
| 118 |
+
if (part.functionCall) {
|
| 119 |
+
toolCalls.push({ id: `gemini-tc-${Date.now()}`, name: part.functionCall.name, arguments: JSON.stringify(part.functionCall.args ?? {}) });
|
| 120 |
+
}
|
| 121 |
+
}
|
| 122 |
+
const meta = data.usageMetadata ?? {};
|
| 123 |
+
const usage: TokenUsage = {
|
| 124 |
+
promptTokens: meta.promptTokenCount ?? 0,
|
| 125 |
+
completionTokens: meta.candidatesTokenCount ?? 0,
|
| 126 |
+
totalTokens: meta.totalTokenCount ?? 0,
|
| 127 |
+
};
|
| 128 |
+
return { content, toolCalls: toolCalls.length ? toolCalls : undefined, usage, finishReason: toolCalls.length ? 'tool_calls' : 'stop' };
|
| 129 |
+
}
|
| 130 |
+
}
|
src/providers/index.ts
ADDED
|
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
// ─── Providers barrel export ─────────────────────────────────────────────────
|
| 2 |
+
export { OpenAIProvider } from './openai/index.js';
|
| 3 |
+
export { AnthropicProvider } from './anthropic/index.js';
|
| 4 |
+
export { GeminiProvider } from './gemini/index.js';
|
| 5 |
+
export { OpenAICompatibleProvider, OpenRouterProvider } from './openrouter/index.js';
|
src/providers/openai/index.ts
ADDED
|
@@ -0,0 +1,148 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
// ─── OpenAI Provider Adapter ────────────────────────────────────────────────
|
| 2 |
+
import type {
|
| 3 |
+
ProviderAdapter, ProviderConfig, ModelInfo, ModelCapability,
|
| 4 |
+
ModelInvocation, ModelResult, ModelStreamEvent, ToolCallRequest,
|
| 5 |
+
} from '../../core/provider/index.js';
|
| 6 |
+
import type { TokenUsage } from '../../core/events/index.js';
|
| 7 |
+
|
| 8 |
+
export class OpenAIProvider implements ProviderAdapter {
|
| 9 |
+
id = 'openai';
|
| 10 |
+
label = 'OpenAI';
|
| 11 |
+
private apiKey: string;
|
| 12 |
+
private baseUrl: string;
|
| 13 |
+
|
| 14 |
+
constructor(config: ProviderConfig) {
|
| 15 |
+
this.apiKey = config.apiKey ?? process.env['OPENAI_API_KEY'] ?? '';
|
| 16 |
+
this.baseUrl = config.baseUrl ?? 'https://api.openai.com/v1';
|
| 17 |
+
}
|
| 18 |
+
|
| 19 |
+
async listModels(): Promise<ModelInfo[]> {
|
| 20 |
+
return [
|
| 21 |
+
{ id: 'gpt-4o', name: 'GPT-4o', provider: 'openai', contextWindow: 128000, maxOutputTokens: 16384, capabilities: ['streaming', 'tool-calling', 'vision', 'json-mode', 'structured-output'], costPerMillionInput: 2.5, costPerMillionOutput: 10 },
|
| 22 |
+
{ id: 'gpt-4o-mini', name: 'GPT-4o Mini', provider: 'openai', contextWindow: 128000, maxOutputTokens: 16384, capabilities: ['streaming', 'tool-calling', 'json-mode', 'structured-output'], costPerMillionInput: 0.15, costPerMillionOutput: 0.6 },
|
| 23 |
+
{ id: 'o1', name: 'o1', provider: 'openai', contextWindow: 200000, maxOutputTokens: 100000, capabilities: ['streaming', 'tool-calling', 'reasoning'], costPerMillionInput: 15, costPerMillionOutput: 60 },
|
| 24 |
+
];
|
| 25 |
+
}
|
| 26 |
+
|
| 27 |
+
supports(capability: ModelCapability): boolean {
|
| 28 |
+
return ['streaming', 'tool-calling', 'vision', 'json-mode', 'structured-output'].includes(capability);
|
| 29 |
+
}
|
| 30 |
+
|
| 31 |
+
async invoke(input: ModelInvocation): Promise<ModelResult> {
|
| 32 |
+
const body = this.buildBody(input, false);
|
| 33 |
+
const res = await fetch(`${this.baseUrl}/chat/completions`, {
|
| 34 |
+
method: 'POST',
|
| 35 |
+
headers: { 'Content-Type': 'application/json', 'Authorization': `Bearer ${this.apiKey}` },
|
| 36 |
+
body: JSON.stringify(body),
|
| 37 |
+
signal: input.signal,
|
| 38 |
+
});
|
| 39 |
+
if (!res.ok) throw new Error(`OpenAI API error: ${res.status} ${await res.text()}`);
|
| 40 |
+
const data = await res.json() as any;
|
| 41 |
+
return this.parseResponse(data);
|
| 42 |
+
}
|
| 43 |
+
|
| 44 |
+
async *stream(input: ModelInvocation): AsyncIterable<ModelStreamEvent> {
|
| 45 |
+
const body = this.buildBody(input, true);
|
| 46 |
+
const res = await fetch(`${this.baseUrl}/chat/completions`, {
|
| 47 |
+
method: 'POST',
|
| 48 |
+
headers: { 'Content-Type': 'application/json', 'Authorization': `Bearer ${this.apiKey}` },
|
| 49 |
+
body: JSON.stringify(body),
|
| 50 |
+
signal: input.signal,
|
| 51 |
+
});
|
| 52 |
+
if (!res.ok) throw new Error(`OpenAI API error: ${res.status} ${await res.text()}`);
|
| 53 |
+
|
| 54 |
+
const reader = res.body!.getReader();
|
| 55 |
+
const decoder = new TextDecoder();
|
| 56 |
+
let buffer = '';
|
| 57 |
+
let fullText = '';
|
| 58 |
+
const toolCalls = new Map<number, { id: string; name: string; args: string }>();
|
| 59 |
+
|
| 60 |
+
while (true) {
|
| 61 |
+
const { done, value } = await reader.read();
|
| 62 |
+
if (done) break;
|
| 63 |
+
buffer += decoder.decode(value, { stream: true });
|
| 64 |
+
const lines = buffer.split('\n');
|
| 65 |
+
buffer = lines.pop() ?? '';
|
| 66 |
+
|
| 67 |
+
for (const line of lines) {
|
| 68 |
+
if (!line.startsWith('data: ') || line === 'data: [DONE]') continue;
|
| 69 |
+
const chunk = JSON.parse(line.slice(6)) as any;
|
| 70 |
+
const delta = chunk.choices?.[0]?.delta;
|
| 71 |
+
if (!delta) continue;
|
| 72 |
+
|
| 73 |
+
if (delta.content) {
|
| 74 |
+
fullText += delta.content;
|
| 75 |
+
yield { type: 'text-delta', text: delta.content };
|
| 76 |
+
}
|
| 77 |
+
|
| 78 |
+
if (delta.tool_calls) {
|
| 79 |
+
for (const tc of delta.tool_calls) {
|
| 80 |
+
const idx = tc.index as number;
|
| 81 |
+
if (tc.id) {
|
| 82 |
+
toolCalls.set(idx, { id: tc.id, name: tc.function?.name ?? '', args: '' });
|
| 83 |
+
yield { type: 'tool-call-start', id: tc.id, name: tc.function?.name ?? '' };
|
| 84 |
+
}
|
| 85 |
+
if (tc.function?.arguments) {
|
| 86 |
+
const existing = toolCalls.get(idx)!;
|
| 87 |
+
existing.args += tc.function.arguments;
|
| 88 |
+
yield { type: 'tool-call-delta', id: existing.id, argumentsDelta: tc.function.arguments };
|
| 89 |
+
}
|
| 90 |
+
}
|
| 91 |
+
}
|
| 92 |
+
|
| 93 |
+
if (chunk.choices?.[0]?.finish_reason) {
|
| 94 |
+
for (const [, tc] of toolCalls) {
|
| 95 |
+
yield { type: 'tool-call-end', id: tc.id };
|
| 96 |
+
}
|
| 97 |
+
const usage: TokenUsage = {
|
| 98 |
+
promptTokens: chunk.usage?.prompt_tokens ?? 0,
|
| 99 |
+
completionTokens: chunk.usage?.completion_tokens ?? 0,
|
| 100 |
+
totalTokens: chunk.usage?.total_tokens ?? 0,
|
| 101 |
+
};
|
| 102 |
+
const tcArray: ToolCallRequest[] = [...toolCalls.values()].map((t) => ({
|
| 103 |
+
id: t.id, name: t.name, arguments: t.args,
|
| 104 |
+
}));
|
| 105 |
+
yield {
|
| 106 |
+
type: 'finish',
|
| 107 |
+
result: { content: fullText, toolCalls: tcArray.length ? tcArray : undefined, usage, finishReason: tcArray.length ? 'tool_calls' : 'stop' },
|
| 108 |
+
};
|
| 109 |
+
}
|
| 110 |
+
}
|
| 111 |
+
}
|
| 112 |
+
}
|
| 113 |
+
|
| 114 |
+
private buildBody(input: ModelInvocation, stream: boolean): Record<string, unknown> {
|
| 115 |
+
const messages = input.messages.map((m) => {
|
| 116 |
+
if (m.role === 'tool') return { role: 'tool', content: m.content, tool_call_id: m.toolCallId };
|
| 117 |
+
if (m.toolCalls) return { role: 'assistant', content: m.content || null, tool_calls: m.toolCalls.map((tc) => ({ id: tc.id, type: 'function', function: { name: tc.name, arguments: tc.arguments } })) };
|
| 118 |
+
return { role: m.role, content: m.content };
|
| 119 |
+
});
|
| 120 |
+
const body: Record<string, unknown> = { model: input.model, messages, stream };
|
| 121 |
+
if (input.tools?.length) {
|
| 122 |
+
body.tools = input.tools.map((t) => ({ type: 'function', function: { name: t.name, description: t.description, parameters: t.parameters } }));
|
| 123 |
+
}
|
| 124 |
+
if (input.temperature !== undefined) body.temperature = input.temperature;
|
| 125 |
+
if (input.maxTokens) body.max_tokens = input.maxTokens;
|
| 126 |
+
if (input.jsonMode) body.response_format = { type: 'json_object' };
|
| 127 |
+
if (stream) body.stream_options = { include_usage: true };
|
| 128 |
+
return body;
|
| 129 |
+
}
|
| 130 |
+
|
| 131 |
+
private parseResponse(data: any): ModelResult {
|
| 132 |
+
const choice = data.choices[0];
|
| 133 |
+
const toolCalls: ToolCallRequest[] | undefined = choice.message.tool_calls?.map((tc: any) => ({
|
| 134 |
+
id: tc.id, name: tc.function.name, arguments: tc.function.arguments,
|
| 135 |
+
}));
|
| 136 |
+
const usage: TokenUsage = {
|
| 137 |
+
promptTokens: data.usage?.prompt_tokens ?? 0,
|
| 138 |
+
completionTokens: data.usage?.completion_tokens ?? 0,
|
| 139 |
+
totalTokens: data.usage?.total_tokens ?? 0,
|
| 140 |
+
};
|
| 141 |
+
return {
|
| 142 |
+
content: choice.message.content ?? '',
|
| 143 |
+
toolCalls,
|
| 144 |
+
usage,
|
| 145 |
+
finishReason: toolCalls?.length ? 'tool_calls' : choice.finish_reason === 'length' ? 'length' : 'stop',
|
| 146 |
+
};
|
| 147 |
+
}
|
| 148 |
+
}
|
src/providers/openrouter/index.ts
ADDED
|
@@ -0,0 +1,48 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
// ─── OpenAI-Compatible Provider (OpenRouter, local, etc.) ───────────────────
|
| 2 |
+
import { OpenAIProvider } from '../openai/index.js';
|
| 3 |
+
import type { ProviderConfig, ModelInfo } from '../../core/provider/index.js';
|
| 4 |
+
|
| 5 |
+
export class OpenAICompatibleProvider extends OpenAIProvider {
|
| 6 |
+
override id: string;
|
| 7 |
+
override label: string;
|
| 8 |
+
|
| 9 |
+
constructor(config: ProviderConfig & { label?: string }) {
|
| 10 |
+
super(config);
|
| 11 |
+
this.id = config.id;
|
| 12 |
+
this.label = config.label ?? config.id;
|
| 13 |
+
}
|
| 14 |
+
|
| 15 |
+
override async listModels(): Promise<ModelInfo[]> {
|
| 16 |
+
// For compatible endpoints, attempt to list models from the API
|
| 17 |
+
try {
|
| 18 |
+
const baseUrl = (this as any).baseUrl;
|
| 19 |
+
const res = await fetch(`${baseUrl}/models`, {
|
| 20 |
+
headers: { 'Authorization': `Bearer ${(this as any).apiKey}` },
|
| 21 |
+
});
|
| 22 |
+
if (!res.ok) return [];
|
| 23 |
+
const data = await res.json() as any;
|
| 24 |
+
return (data.data ?? []).map((m: any) => ({
|
| 25 |
+
id: m.id,
|
| 26 |
+
name: m.id,
|
| 27 |
+
provider: this.id,
|
| 28 |
+
contextWindow: m.context_length ?? 128000,
|
| 29 |
+
capabilities: ['streaming', 'tool-calling'] as any[],
|
| 30 |
+
}));
|
| 31 |
+
} catch {
|
| 32 |
+
return [];
|
| 33 |
+
}
|
| 34 |
+
}
|
| 35 |
+
}
|
| 36 |
+
|
| 37 |
+
// ─── OpenRouter convenience subclass ────────────────────────────────────────
|
| 38 |
+
export class OpenRouterProvider extends OpenAICompatibleProvider {
|
| 39 |
+
constructor(config: Omit<ProviderConfig, 'id'>) {
|
| 40 |
+
super({
|
| 41 |
+
...config,
|
| 42 |
+
id: 'openrouter',
|
| 43 |
+
baseUrl: config.baseUrl ?? 'https://openrouter.ai/api/v1',
|
| 44 |
+
apiKey: config.apiKey ?? process.env['OPENROUTER_API_KEY'] ?? '',
|
| 45 |
+
label: 'OpenRouter',
|
| 46 |
+
} as any);
|
| 47 |
+
}
|
| 48 |
+
}
|
src/skills/coding/index.ts
ADDED
|
@@ -0,0 +1,36 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
// ─── Coding Skill ───────────────────────────────────────────────────────────
|
| 2 |
+
import type { SkillModule } from '../../core/skills/index.js';
|
| 3 |
+
|
| 4 |
+
export const codingSkill: SkillModule = {
|
| 5 |
+
id: 'coding',
|
| 6 |
+
title: 'Software Engineering',
|
| 7 |
+
description: 'Write, refactor, test, and debug code across languages and frameworks.',
|
| 8 |
+
suggestedTools: ['read_file', 'write_file', 'list_directory', 'shell_exec'],
|
| 9 |
+
tags: ['code', 'dev', 'engineering'],
|
| 10 |
+
instructions: `You are an expert software engineer. Follow these rules:
|
| 11 |
+
|
| 12 |
+
## Planning
|
| 13 |
+
- Break complex tasks into subtasks. Plan before coding.
|
| 14 |
+
- State assumptions explicitly before implementing.
|
| 15 |
+
|
| 16 |
+
## Code Quality
|
| 17 |
+
- Write clean, typed, well-documented code.
|
| 18 |
+
- Follow existing project conventions (formatting, naming, structure).
|
| 19 |
+
- Prefer small, focused functions over large monoliths.
|
| 20 |
+
- Add error handling for all I/O operations.
|
| 21 |
+
|
| 22 |
+
## Verification
|
| 23 |
+
- After writing code, run the test suite or relevant checks.
|
| 24 |
+
- If tests fail, read the error, diagnose the root cause, and fix it.
|
| 25 |
+
- Do not declare success without verification.
|
| 26 |
+
|
| 27 |
+
## File Operations
|
| 28 |
+
- Read files before modifying them to understand context.
|
| 29 |
+
- Make minimal targeted edits rather than rewriting entire files.
|
| 30 |
+
- Create new files when the change is substantial.
|
| 31 |
+
|
| 32 |
+
## Communication
|
| 33 |
+
- Explain your reasoning concisely.
|
| 34 |
+
- Show relevant code snippets in your response.
|
| 35 |
+
- Report test results and any remaining issues.`,
|
| 36 |
+
};
|
src/skills/docs/index.ts
ADDED
|
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
// ─── Docs Skill ─────────────────────────────────────────────────────────────
|
| 2 |
+
import type { SkillModule } from '../../core/skills/index.js';
|
| 3 |
+
|
| 4 |
+
export const docsSkill: SkillModule = {
|
| 5 |
+
id: 'docs',
|
| 6 |
+
title: 'Documentation',
|
| 7 |
+
description: 'Write clear, structured technical documentation, READMEs, guides, and API references.',
|
| 8 |
+
suggestedTools: ['read_file', 'write_file', 'list_directory'],
|
| 9 |
+
tags: ['docs', 'writing', 'technical-writing'],
|
| 10 |
+
instructions: `You are an expert technical writer. Follow these rules:
|
| 11 |
+
|
| 12 |
+
## Structure
|
| 13 |
+
- Use clear hierarchical headings (h1 for title, h2 for sections, h3 for subsections).
|
| 14 |
+
- Start with a brief overview/summary before diving into details.
|
| 15 |
+
- Include a table of contents for documents longer than 3 sections.
|
| 16 |
+
|
| 17 |
+
## Clarity
|
| 18 |
+
- Write for the target audience (developers, users, or operators).
|
| 19 |
+
- Define terms on first use.
|
| 20 |
+
- Use active voice and present tense.
|
| 21 |
+
- Keep sentences short and paragraphs focused.
|
| 22 |
+
|
| 23 |
+
## Code Examples
|
| 24 |
+
- Include working code examples for every API or feature.
|
| 25 |
+
- Show both minimal and realistic usage patterns.
|
| 26 |
+
- Annotate non-obvious lines with comments.
|
| 27 |
+
|
| 28 |
+
## Completeness
|
| 29 |
+
- Cover: what it is, why to use it, how to install, how to use, configuration, troubleshooting.
|
| 30 |
+
- Include prerequisites and environment requirements.
|
| 31 |
+
- Document error conditions and edge cases.
|
| 32 |
+
|
| 33 |
+
## Format
|
| 34 |
+
- Use Markdown with consistent formatting.
|
| 35 |
+
- Use tables for structured comparisons.
|
| 36 |
+
- Use admonitions (> **Note:**, > **Warning:**) for important callouts.`,
|
| 37 |
+
};
|
src/skills/index.ts
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
// ─── Skills barrel export ────────────────────────────────────────────────────
|
| 2 |
+
export { codingSkill } from './coding/index.js';
|
| 3 |
+
export { researchSkill } from './research/index.js';
|
| 4 |
+
export { docsSkill } from './docs/index.js';
|
src/skills/research/index.ts
ADDED
|
@@ -0,0 +1,33 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
// ─── Research Skill ──────────────────────────────────────────────────────────
|
| 2 |
+
import type { SkillModule } from '../../core/skills/index.js';
|
| 3 |
+
|
| 4 |
+
export const researchSkill: SkillModule = {
|
| 5 |
+
id: 'research',
|
| 6 |
+
title: 'Research & Analysis',
|
| 7 |
+
description: 'Gather information from web sources, documentation, and APIs to answer questions or inform decisions.',
|
| 8 |
+
suggestedTools: ['web_fetch', 'read_file'],
|
| 9 |
+
tags: ['research', 'analysis', 'information'],
|
| 10 |
+
instructions: `You are a thorough researcher. Follow these rules:
|
| 11 |
+
|
| 12 |
+
## Methodology
|
| 13 |
+
- Start with the primary source (official docs, original paper, authoritative API).
|
| 14 |
+
- Cross-reference multiple sources for claims that matter.
|
| 15 |
+
- Distinguish facts from opinions and speculation.
|
| 16 |
+
- Note when information may be outdated.
|
| 17 |
+
|
| 18 |
+
## Output
|
| 19 |
+
- Structure findings with clear headings and bullet points.
|
| 20 |
+
- Cite sources with URLs when available.
|
| 21 |
+
- Highlight key findings, contradictions, and gaps.
|
| 22 |
+
- Provide a summary with confidence level for each major claim.
|
| 23 |
+
|
| 24 |
+
## Web Research
|
| 25 |
+
- Fetch documentation pages and extract relevant sections.
|
| 26 |
+
- Do not hallucinate URLs or content you haven't fetched.
|
| 27 |
+
- If a page is unavailable, note it and try alternatives.
|
| 28 |
+
|
| 29 |
+
## Depth
|
| 30 |
+
- For technical questions, go to the source code or spec.
|
| 31 |
+
- For market/product questions, find multiple data points.
|
| 32 |
+
- Always answer the actual question, not adjacent ones.`,
|
| 33 |
+
};
|
src/tools/fs/index.ts
ADDED
|
@@ -0,0 +1,73 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
// ─── Filesystem Tool ────────────────────────────────────────────────────────
|
| 2 |
+
import { z } from 'zod';
|
| 3 |
+
import { readFile, writeFile, readdir, stat, mkdir } from 'fs/promises';
|
| 4 |
+
import { join, resolve } from 'path';
|
| 5 |
+
import type { ToolDef } from '../../core/tools/index.js';
|
| 6 |
+
|
| 7 |
+
export const readFileTool: ToolDef<{ path: string }, { content: string; size: number }> = {
|
| 8 |
+
id: 'fs.read',
|
| 9 |
+
name: 'read_file',
|
| 10 |
+
description: 'Read the contents of a file at the given path. Returns the text content and file size in bytes.',
|
| 11 |
+
inputSchema: z.object({ path: z.string().describe('Absolute or relative file path to read') }),
|
| 12 |
+
outputSchema: z.object({ content: z.string(), size: z.number() }),
|
| 13 |
+
permission: 'read',
|
| 14 |
+
sideEffect: 'none',
|
| 15 |
+
timeout: 10000,
|
| 16 |
+
retries: 0,
|
| 17 |
+
tags: ['filesystem'],
|
| 18 |
+
renderer: { icon: '📄', color: 'blue' },
|
| 19 |
+
async execute(input, ctx) {
|
| 20 |
+
const filePath = resolve(ctx.workDir, input.path);
|
| 21 |
+
const content = await readFile(filePath, 'utf-8');
|
| 22 |
+
const stats = await stat(filePath);
|
| 23 |
+
return { content, size: stats.size };
|
| 24 |
+
},
|
| 25 |
+
};
|
| 26 |
+
|
| 27 |
+
export const writeFileTool: ToolDef<{ path: string; content: string }, { written: boolean; path: string }> = {
|
| 28 |
+
id: 'fs.write',
|
| 29 |
+
name: 'write_file',
|
| 30 |
+
description: 'Write content to a file. Creates parent directories if needed. Overwrites existing content.',
|
| 31 |
+
inputSchema: z.object({
|
| 32 |
+
path: z.string().describe('File path to write to'),
|
| 33 |
+
content: z.string().describe('Content to write'),
|
| 34 |
+
}),
|
| 35 |
+
outputSchema: z.object({ written: z.boolean(), path: z.string() }),
|
| 36 |
+
permission: 'write',
|
| 37 |
+
sideEffect: 'filesystem',
|
| 38 |
+
timeout: 10000,
|
| 39 |
+
retries: 0,
|
| 40 |
+
tags: ['filesystem'],
|
| 41 |
+
renderer: { icon: '✏️', color: 'yellow' },
|
| 42 |
+
async execute(input, ctx) {
|
| 43 |
+
const filePath = resolve(ctx.workDir, input.path);
|
| 44 |
+
const dir = filePath.substring(0, filePath.lastIndexOf('/'));
|
| 45 |
+
await mkdir(dir, { recursive: true });
|
| 46 |
+
await writeFile(filePath, input.content, 'utf-8');
|
| 47 |
+
return { written: true, path: filePath };
|
| 48 |
+
},
|
| 49 |
+
};
|
| 50 |
+
|
| 51 |
+
export const listDirTool: ToolDef<{ path: string }, { entries: Array<{ name: string; type: string; size: number }> }> = {
|
| 52 |
+
id: 'fs.list',
|
| 53 |
+
name: 'list_directory',
|
| 54 |
+
description: 'List files and directories at the given path with type and size information.',
|
| 55 |
+
inputSchema: z.object({ path: z.string().describe('Directory path to list') }),
|
| 56 |
+
outputSchema: z.object({ entries: z.array(z.object({ name: z.string(), type: z.string(), size: z.number() })) }),
|
| 57 |
+
permission: 'read',
|
| 58 |
+
sideEffect: 'none',
|
| 59 |
+
timeout: 10000,
|
| 60 |
+
retries: 0,
|
| 61 |
+
tags: ['filesystem'],
|
| 62 |
+
renderer: { icon: '📁', color: 'blue' },
|
| 63 |
+
async execute(input, ctx) {
|
| 64 |
+
const dirPath = resolve(ctx.workDir, input.path);
|
| 65 |
+
const items = await readdir(dirPath, { withFileTypes: true });
|
| 66 |
+
const entries = await Promise.all(items.map(async (item) => {
|
| 67 |
+
const fullPath = join(dirPath, item.name);
|
| 68 |
+
const stats = await stat(fullPath).catch(() => ({ size: 0 }));
|
| 69 |
+
return { name: item.name, type: item.isDirectory() ? 'directory' : 'file', size: stats.size };
|
| 70 |
+
}));
|
| 71 |
+
return { entries };
|
| 72 |
+
},
|
| 73 |
+
};
|
src/tools/index.ts
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
// ─── Tools barrel export ─────────────────────────────────────────────────────
|
| 2 |
+
export { readFileTool, writeFileTool, listDirTool } from './fs/index.js';
|
| 3 |
+
export { shellExecTool } from './shell/index.js';
|
| 4 |
+
export { webFetchTool } from './web/index.js';
|
src/tools/shell/index.ts
ADDED
|
@@ -0,0 +1,50 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
// ─── Shell Tool ─────────────────────────────────────────────────────────────
|
| 2 |
+
import { z } from 'zod';
|
| 3 |
+
import { exec } from 'child_process';
|
| 4 |
+
import { promisify } from 'util';
|
| 5 |
+
import type { ToolDef } from '../../core/tools/index.js';
|
| 6 |
+
|
| 7 |
+
const execAsync = promisify(exec);
|
| 8 |
+
|
| 9 |
+
export const shellExecTool: ToolDef<
|
| 10 |
+
{ command: string; timeout?: number },
|
| 11 |
+
{ stdout: string; stderr: string; exitCode: number }
|
| 12 |
+
> = {
|
| 13 |
+
id: 'shell.exec',
|
| 14 |
+
name: 'shell_exec',
|
| 15 |
+
description: 'Execute a shell command and return stdout, stderr, and exit code. Use for running builds, tests, git commands, or any CLI tool.',
|
| 16 |
+
inputSchema: z.object({
|
| 17 |
+
command: z.string().describe('Shell command to execute'),
|
| 18 |
+
timeout: z.number().optional().describe('Timeout in milliseconds (default: 30000)'),
|
| 19 |
+
}),
|
| 20 |
+
outputSchema: z.object({
|
| 21 |
+
stdout: z.string(),
|
| 22 |
+
stderr: z.string(),
|
| 23 |
+
exitCode: z.number(),
|
| 24 |
+
}),
|
| 25 |
+
permission: 'exec',
|
| 26 |
+
sideEffect: 'process',
|
| 27 |
+
timeout: 60000,
|
| 28 |
+
retries: 0,
|
| 29 |
+
tags: ['shell', 'exec'],
|
| 30 |
+
renderer: { icon: '⚡', color: 'green' },
|
| 31 |
+
async execute(input, ctx) {
|
| 32 |
+
ctx.emit(`Executing: ${input.command}`);
|
| 33 |
+
const timeout = input.timeout ?? 30000;
|
| 34 |
+
try {
|
| 35 |
+
const { stdout, stderr } = await execAsync(input.command, {
|
| 36 |
+
cwd: ctx.workDir,
|
| 37 |
+
timeout,
|
| 38 |
+
maxBuffer: 1024 * 1024 * 10, // 10MB
|
| 39 |
+
signal: ctx.signal,
|
| 40 |
+
});
|
| 41 |
+
return { stdout: stdout.slice(0, 50000), stderr: stderr.slice(0, 10000), exitCode: 0 };
|
| 42 |
+
} catch (err: any) {
|
| 43 |
+
return {
|
| 44 |
+
stdout: (err.stdout ?? '').slice(0, 50000),
|
| 45 |
+
stderr: (err.stderr ?? err.message ?? '').slice(0, 10000),
|
| 46 |
+
exitCode: err.code ?? 1,
|
| 47 |
+
};
|
| 48 |
+
}
|
| 49 |
+
},
|
| 50 |
+
};
|
src/tools/web/index.ts
ADDED
|
@@ -0,0 +1,46 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
// ─── Web Fetch Tool ─────────────────────────────────────────────────────────
|
| 2 |
+
import { z } from 'zod';
|
| 3 |
+
import type { ToolDef } from '../../core/tools/index.js';
|
| 4 |
+
|
| 5 |
+
export const webFetchTool: ToolDef<
|
| 6 |
+
{ url: string; method?: string; headers?: Record<string, string>; body?: string },
|
| 7 |
+
{ status: number; headers: Record<string, string>; body: string }
|
| 8 |
+
> = {
|
| 9 |
+
id: 'web.fetch',
|
| 10 |
+
name: 'web_fetch',
|
| 11 |
+
description: 'Make an HTTP request to a URL. Returns status, headers, and body (truncated to 100KB). Useful for reading docs, APIs, and web pages.',
|
| 12 |
+
inputSchema: z.object({
|
| 13 |
+
url: z.string().url().describe('URL to fetch'),
|
| 14 |
+
method: z.string().optional().describe('HTTP method (default: GET)'),
|
| 15 |
+
headers: z.record(z.string()).optional().describe('Request headers'),
|
| 16 |
+
body: z.string().optional().describe('Request body for POST/PUT'),
|
| 17 |
+
}),
|
| 18 |
+
outputSchema: z.object({
|
| 19 |
+
status: z.number(),
|
| 20 |
+
headers: z.record(z.string()),
|
| 21 |
+
body: z.string(),
|
| 22 |
+
}),
|
| 23 |
+
permission: 'network',
|
| 24 |
+
sideEffect: 'network',
|
| 25 |
+
timeout: 30000,
|
| 26 |
+
retries: 1,
|
| 27 |
+
tags: ['web', 'network', 'http'],
|
| 28 |
+
renderer: { icon: '🌐', color: 'cyan' },
|
| 29 |
+
async execute(input, ctx) {
|
| 30 |
+
ctx.emit(`Fetching: ${input.url}`);
|
| 31 |
+
const res = await fetch(input.url, {
|
| 32 |
+
method: input.method ?? 'GET',
|
| 33 |
+
headers: input.headers,
|
| 34 |
+
body: input.body,
|
| 35 |
+
signal: ctx.signal,
|
| 36 |
+
});
|
| 37 |
+
const body = await res.text();
|
| 38 |
+
const headers: Record<string, string> = {};
|
| 39 |
+
res.headers.forEach((value, key) => { headers[key] = value; });
|
| 40 |
+
return {
|
| 41 |
+
status: res.status,
|
| 42 |
+
headers,
|
| 43 |
+
body: body.slice(0, 100_000),
|
| 44 |
+
};
|
| 45 |
+
},
|
| 46 |
+
};
|
tsconfig.json
ADDED
|
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"compilerOptions": {
|
| 3 |
+
"target": "ES2022",
|
| 4 |
+
"module": "ESNext",
|
| 5 |
+
"moduleResolution": "bundler",
|
| 6 |
+
"lib": ["ES2022"],
|
| 7 |
+
"outDir": "./dist",
|
| 8 |
+
"rootDir": "./src",
|
| 9 |
+
"strict": true,
|
| 10 |
+
"esModuleInterop": true,
|
| 11 |
+
"skipLibCheck": true,
|
| 12 |
+
"forceConsistentCasingInFileNames": true,
|
| 13 |
+
"resolveJsonModule": true,
|
| 14 |
+
"declaration": true,
|
| 15 |
+
"declarationMap": true,
|
| 16 |
+
"sourceMap": true,
|
| 17 |
+
"noUncheckedIndexedAccess": true,
|
| 18 |
+
"noUnusedLocals": true,
|
| 19 |
+
"noUnusedParameters": true,
|
| 20 |
+
"exactOptionalPropertyTypes": false,
|
| 21 |
+
"paths": {
|
| 22 |
+
"@core/*": ["./src/core/*"],
|
| 23 |
+
"@providers/*": ["./src/providers/*"],
|
| 24 |
+
"@cli/*": ["./src/cli/*"],
|
| 25 |
+
"@tools/*": ["./src/tools/*"],
|
| 26 |
+
"@skills/*": ["./src/skills/*"]
|
| 27 |
+
}
|
| 28 |
+
},
|
| 29 |
+
"include": ["src/**/*.ts"],
|
| 30 |
+
"exclude": ["node_modules", "dist"]
|
| 31 |
+
}
|