stevenkhan commited on
Commit
908562b
·
verified ·
1 Parent(s): a008404

Initial AI Harness - production-grade model-agnostic CLI agent runtime

Browse files
EXTENSION_GUIDE.md ADDED
@@ -0,0 +1,206 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Extension Guide
2
+
3
+ This guide explains how to extend AI Harness with new providers, tools, skills, evaluator checks, and renderers.
4
+
5
+ ## Adding a New Provider
6
+
7
+ 1. Create `src/providers/your-provider/index.ts`
8
+ 2. Implement the `ProviderAdapter` interface:
9
+
10
+ ```typescript
11
+ import type { ProviderAdapter, ProviderConfig, ModelInfo, ModelCapability, ModelInvocation, ModelResult, ModelStreamEvent } from '../../core/provider/index.js';
12
+
13
+ export class YourProvider implements ProviderAdapter {
14
+ id = 'your-provider';
15
+ label = 'Your Provider';
16
+
17
+ constructor(config: ProviderConfig) {
18
+ // Store API key, base URL, etc.
19
+ }
20
+
21
+ async listModels(): Promise<ModelInfo[]> {
22
+ // Return available models with capabilities and pricing
23
+ }
24
+
25
+ supports(capability: ModelCapability): boolean {
26
+ // Return true for supported capabilities
27
+ }
28
+
29
+ async invoke(input: ModelInvocation): Promise<ModelResult> {
30
+ // Make a non-streaming request, return parsed result
31
+ }
32
+
33
+ async *stream(input: ModelInvocation): AsyncIterable<ModelStreamEvent> {
34
+ // Yield streaming events: text-delta, tool-call-start, tool-call-delta, tool-call-end, finish
35
+ }
36
+ }
37
+ ```
38
+
39
+ 3. Register in `src/cli/state/provider-resolver.ts`:
40
+
41
+ ```typescript
42
+ case 'your-provider':
43
+ return new YourProvider({ id: 'your-provider', ...config });
44
+ ```
45
+
46
+ ### Key requirements:
47
+ - Normalize all message formats to the common `Message` type
48
+ - Handle tool calling format differences internally
49
+ - Emit proper `TokenUsage` in results (even if estimated)
50
+ - Support `AbortSignal` for cancellation
51
+ - Handle rate limits and retries internally
52
+
53
+ ---
54
+
55
+ ## Adding a New Tool
56
+
57
+ 1. Create `src/tools/your-category/index.ts`
58
+ 2. Define the tool with full typing:
59
+
60
+ ```typescript
61
+ import { z } from 'zod';
62
+ import type { ToolDef } from '../../core/tools/index.js';
63
+
64
+ export const yourTool: ToolDef<
65
+ { param1: string; param2?: number },
66
+ { result: string }
67
+ > = {
68
+ id: 'category.action',
69
+ name: 'your_tool_name', // This is what the model sees
70
+ description: 'Clear, concise description of what this tool does.',
71
+ inputSchema: z.object({
72
+ param1: z.string().describe('What this parameter does'),
73
+ param2: z.number().optional().describe('Optional parameter'),
74
+ }),
75
+ outputSchema: z.object({
76
+ result: z.string(),
77
+ }),
78
+ permission: 'read', // read | write | exec | network | dangerous
79
+ sideEffect: 'none', // none | filesystem | network | process | mixed
80
+ timeout: 10000, // ms
81
+ retries: 1, // number of automatic retries on failure
82
+ tags: ['your-category'],
83
+ renderer: { icon: '🔧', color: 'blue' },
84
+ async execute(input, ctx) {
85
+ ctx.emit('Starting execution...'); // Progress updates
86
+ // Do the work
87
+ return { result: 'done' };
88
+ },
89
+ };
90
+ ```
91
+
92
+ 3. Register in the runtime setup (e.g., `src/cli/commands/run.ts`):
93
+
94
+ ```typescript
95
+ import { yourTool } from '../../tools/your-category/index.js';
96
+ tools.register(yourTool);
97
+ ```
98
+
99
+ ### Permission levels:
100
+ | Level | Meaning | Default policy |
101
+ |-------|---------|----------------|
102
+ | `read` | Only reads state | Auto-approved |
103
+ | `write` | Modifies files/state | Confirm in `confirm-writes` mode |
104
+ | `exec` | Runs processes | Confirm in `confirm-writes` mode |
105
+ | `network` | Makes network requests | Confirm in `confirm-network` mode |
106
+ | `dangerous` | Destructive/irreversible | Always requires approval |
107
+
108
+ ---
109
+
110
+ ## Adding a New Skill
111
+
112
+ 1. Create `src/skills/your-skill/index.ts`
113
+
114
+ ```typescript
115
+ import type { SkillModule } from '../../core/skills/index.js';
116
+
117
+ export const yourSkill: SkillModule = {
118
+ id: 'your-skill',
119
+ title: 'Your Skill Title',
120
+ description: 'One-line description.',
121
+ suggestedTools: ['tool_name_1', 'tool_name_2'],
122
+ tags: ['tag1', 'tag2'],
123
+ instructions: `Detailed multi-line instructions that will be injected into the system prompt when this skill is active.
124
+
125
+ ## Section 1
126
+ - Rule 1
127
+ - Rule 2
128
+
129
+ ## Section 2
130
+ - Rule 3`,
131
+ };
132
+ ```
133
+
134
+ 2. Register in the skill registry and export from `src/skills/index.ts`.
135
+
136
+ ### Tips:
137
+ - Keep instructions focused and actionable
138
+ - Reference specific tool names the model should use
139
+ - Include both "do" and "don't" rules
140
+ - Structure with markdown headings for readability
141
+
142
+ ---
143
+
144
+ ## Adding an Evaluator Check
145
+
146
+ ```typescript
147
+ import type { EvalCheck } from '../../core/evaluators/index.js';
148
+
149
+ export const yourCheck: EvalCheck = {
150
+ name: 'your-check-name',
151
+ async run(ctx) {
152
+ // ctx.goal — the original task goal
153
+ // ctx.assistantOutput — all assistant messages concatenated
154
+ // ctx.artifacts — generated artifacts
155
+ // ctx.workDir — working directory
156
+
157
+ const passed = /* your logic */;
158
+ return {
159
+ passed,
160
+ message: passed ? undefined : 'Explanation of what failed',
161
+ };
162
+ },
163
+ };
164
+
165
+ // Register:
166
+ evaluator.addCheck(yourCheck);
167
+ ```
168
+
169
+ ### Common check patterns:
170
+ - **Schema validation** — parse output with Zod
171
+ - **File existence** — verify expected files were created
172
+ - **Test execution** — run `npm test` and check exit code
173
+ - **Content matching** — verify output contains required elements
174
+ - **Length/quality** — check response isn't too short or repetitive
175
+
176
+ ---
177
+
178
+ ## Custom Renderer
179
+
180
+ The `EventRenderer` class in `src/cli/renderers/index.ts` handles all terminal output. To customize:
181
+
182
+ 1. Subclass or modify `EventRenderer`
183
+ 2. Add cases for event types you want to render differently
184
+ 3. Use the box-drawing utilities for structured output
185
+
186
+ ```typescript
187
+ class CustomRenderer extends EventRenderer {
188
+ override render(event: HarnessEvent): void {
189
+ if (event.type === 'your.custom.event') {
190
+ // Custom rendering
191
+ return;
192
+ }
193
+ super.render(event);
194
+ }
195
+ }
196
+ ```
197
+
198
+ ---
199
+
200
+ ## Adding Custom Events
201
+
202
+ 1. Add your event type to the `HarnessEvent` union in `src/core/events/index.ts`
203
+ 2. Emit it via `eventBus.emit({ type: 'your.event', ... })`
204
+ 3. Handle it in the renderer
205
+
206
+ The event system is intentionally open — any component can emit events, and any number of listeners can consume them.
README.md CHANGED
@@ -1,26 +1,142 @@
1
- ---
2
- tags:
3
- - ml-intern
4
- ---
5
 
6
- # stevenkhan/ai-harness
7
 
8
- <!-- ml-intern-provenance -->
9
- ## Generated by ML Intern
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10
 
11
- This model repository was generated by [ML Intern](https://github.com/huggingface/ml-intern), an agent for machine learning research and development on the Hugging Face Hub.
12
 
13
- - Try ML Intern: https://smolagents-ml-intern.hf.space
14
- - Source code: https://github.com/huggingface/ml-intern
 
 
 
 
 
 
 
 
 
 
 
15
 
16
- ## Usage
17
 
18
- ```python
19
- from transformers import AutoModelForCausalLM, AutoTokenizer
 
 
 
 
 
 
20
 
21
- model_id = "stevenkhan/ai-harness"
22
- tokenizer = AutoTokenizer.from_pretrained(model_id)
23
- model = AutoModelForCausalLM.from_pretrained(model_id)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
24
  ```
25
 
26
- For non-causal architectures, replace `AutoModelForCausalLM` with the appropriate `AutoModel` class.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # AI Harness
 
 
 
2
 
3
+ A production-grade, model-agnostic CLI harness for agentic AI workflows.
4
 
5
+ ```
6
+ ╭─────────────────────────────────────╮
7
+ │ ⚡ AI Harness v0.1.0 │
8
+ │ model-agnostic CLI agent runtime │
9
+ ╰─────────────────────────────────────╯
10
+ ```
11
+
12
+ ## What is this?
13
+
14
+ A terminal-first agent runtime. Not a toy chatbot. It supports:
15
+
16
+ - **Multiple LLM providers** — OpenAI, Anthropic, Gemini, OpenRouter, any OpenAI-compatible endpoint
17
+ - **Typed tool calling** — Zod-validated inputs/outputs, permissions, retries, timeouts
18
+ - **Modular skills** — Attachable instruction packs per task
19
+ - **Structured runtime** — Planner/executor/evaluator roles, budgets, loop detection
20
+ - **Beautiful CLI output** — Streaming, spinners, panels, event timeline, metrics
21
+ - **Observability** — Token usage, cost tracking, latency, success rates
22
+ - **Evaluation** — Schema checks, rubric scoring, remediation loops
23
+ - **Artifact handling** — Files, patches, logs, export to Markdown/JSON
24
+ - **Safety & permissions** — Read/write/exec/network/dangerous levels with policy modes
25
+
26
+ ## Quick Start
27
+
28
+ ```bash
29
+ # Install dependencies
30
+ pnpm install
31
+
32
+ # Build
33
+ pnpm build
34
+
35
+ # Interactive chat
36
+ pnpm chat
37
+
38
+ # Autonomous task
39
+ node dist/cli/index.js run "refactor the auth module to use JWT"
40
+
41
+ # List providers/models
42
+ node dist/cli/index.js providers
43
+
44
+ # List tools
45
+ node dist/cli/index.js tools
46
+
47
+ # List skills
48
+ node dist/cli/index.js skills
49
+ ```
50
+
51
+ ## Configuration
52
 
53
+ Set provider API keys via environment variables:
54
 
55
+ ```bash
56
+ export OPENAI_API_KEY="sk-..."
57
+ export ANTHROPIC_API_KEY="sk-ant-..."
58
+ export GEMINI_API_KEY="AI..."
59
+ export OPENROUTER_API_KEY="sk-or-..."
60
+ ```
61
+
62
+ Override defaults with CLI flags:
63
+
64
+ ```bash
65
+ harness chat --provider openai --model gpt-4o --skills coding research --verbose
66
+ harness run "build a REST API" --provider anthropic --model claude-sonnet-4-20250514 --budget-tokens 100000
67
+ ```
68
 
69
+ ## Commands
70
 
71
+ | Command | Description |
72
+ |---------|-------------|
73
+ | `harness chat` | Interactive multi-turn chat |
74
+ | `harness run <goal>` | Autonomous task execution |
75
+ | `harness providers` | List providers and models |
76
+ | `harness tools` | List available tools |
77
+ | `harness skills` | List available skills |
78
+ | `harness config` | Show configuration |
79
 
80
+ ## Architecture
81
+
82
+ ```
83
+ src/
84
+ core/
85
+ events/ — Event types, EventBus
86
+ provider/ — ProviderAdapter interface, message types
87
+ runtime/ — Session state, orchestration loop
88
+ tools/ — ToolRegistry, ToolDef, permissions
89
+ skills/ — SkillRegistry, SkillModule
90
+ evaluators/ — Evaluation checks, EvalReport
91
+ artifacts/ — ArtifactStore, export
92
+ policy/ — PolicyEngine, permission enforcement
93
+ observability/ — MetricsCollector, MetricEntry
94
+ providers/
95
+ openai/ — OpenAI adapter
96
+ anthropic/ — Anthropic adapter
97
+ gemini/ — Google Gemini adapter
98
+ openrouter/ — OpenRouter + OpenAI-compatible adapter
99
+ tools/
100
+ fs/ — read_file, write_file, list_directory
101
+ shell/ — shell_exec
102
+ web/ — web_fetch
103
+ skills/
104
+ coding/ — Software engineering instructions
105
+ research/ — Research & analysis instructions
106
+ docs/ — Technical writing instructions
107
+ cli/
108
+ index.ts — Commander entry point
109
+ commands/ — chat, run, providers, tools, skills, config
110
+ renderers/ — EventRenderer, Spinner, box drawing, metrics
111
+ state/ — Provider resolver, runtime factory
112
  ```
113
 
114
+ ## Key Design Decisions
115
+
116
+ ### Event-driven architecture
117
+ Everything flows through `EventBus`. Rendering, logging, metrics collection, and export all subscribe to the same event stream. This means you can add a new consumer (e.g., a web dashboard) without touching core logic.
118
+
119
+ ### Provider normalization
120
+ All providers implement `ProviderAdapter` with `invoke()` and `stream()`. Message format, tool calling conventions, and response parsing are handled per-provider so the runtime never sees vendor-specific shapes.
121
+
122
+ ### Typed tools with Zod
123
+ Every tool declares its input/output schemas with Zod. The runtime validates inputs before execution and can generate JSON Schema for model function-calling automatically.
124
+
125
+ ### Policy enforcement
126
+ The `PolicyEngine` checks permission levels against the current policy mode before executing any tool. Denied tools return structured error messages to the model so it can adapt.
127
+
128
+ ### Evaluation loop
129
+ After task completion, the `Evaluator` runs all registered checks. Failed checks can trigger remediation (retry with error context), preventing premature success declarations.
130
+
131
+ ## Extending
132
+
133
+ See [EXTENSION_GUIDE.md](./EXTENSION_GUIDE.md) for detailed instructions on adding:
134
+ - New providers
135
+ - New tools
136
+ - New skills
137
+ - New evaluator checks
138
+ - Custom renderers
139
+
140
+ ## License
141
+
142
+ MIT
package.json ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "name": "ai-harness",
3
+ "version": "0.1.0",
4
+ "private": true,
5
+ "type": "module",
6
+ "description": "Production-grade, model-agnostic CLI harness for agentic AI workflows",
7
+ "engines": { "node": ">=20.0.0" },
8
+ "scripts": {
9
+ "build": "tsc -b",
10
+ "dev": "tsc -b --watch",
11
+ "start": "node dist/cli/index.js",
12
+ "chat": "node dist/cli/index.js chat",
13
+ "run": "node dist/cli/index.js run",
14
+ "test": "vitest"
15
+ },
16
+ "dependencies": {
17
+ "zod": "^3.23.0",
18
+ "commander": "^12.1.0",
19
+ "chalk": "^5.3.0",
20
+ "ora": "^8.0.0",
21
+ "marked": "^12.0.0",
22
+ "marked-terminal": "^7.0.0",
23
+ "cli-table3": "^0.6.5",
24
+ "log-update": "^6.0.0",
25
+ "conf": "^13.0.0",
26
+ "better-sqlite3": "^11.0.0",
27
+ "nanoid": "^5.0.0",
28
+ "openai": "^4.52.0",
29
+ "@anthropic-ai/sdk": "^0.24.0",
30
+ "@google/generative-ai": "^0.12.0",
31
+ "eventsource-parser": "^1.1.0",
32
+ "undici": "^6.19.0",
33
+ "figures": "^6.1.0",
34
+ "boxen": "^8.0.0"
35
+ },
36
+ "devDependencies": {
37
+ "typescript": "^5.5.0",
38
+ "@types/node": "^20.14.0",
39
+ "@types/better-sqlite3": "^7.6.0",
40
+ "vitest": "^1.6.0"
41
+ }
42
+ }
src/cli/commands/chat.ts ADDED
@@ -0,0 +1,62 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ // ─── Chat Command ───────────────────────────────────────────────────────────
2
+ import { createInterface } from 'readline';
3
+ import { createRuntime } from '../state/factory.js';
4
+ import { EventRenderer } from '../renderers/index.js';
5
+ import type { Message } from '../../core/provider/index.js';
6
+ import { now, type HarnessEvent } from '../../core/events/index.js';
7
+
8
+ export async function chatCommand(opts: {
9
+ provider: string;
10
+ model?: string;
11
+ skills: string[];
12
+ verbose?: boolean;
13
+ compact?: boolean;
14
+ }): Promise<void> {
15
+ const renderer = new EventRenderer({ verbose: opts.verbose ?? false, compact: opts.compact ?? false });
16
+ const { runtime, eventBus, provider } = await createRuntime(opts);
17
+
18
+ eventBus.on((event) => renderer.render(event));
19
+
20
+ const rl = createInterface({ input: process.stdin, output: process.stdout });
21
+ const prompt = () => new Promise<string>((resolve) => rl.question('\x1b[36m❯\x1b[0m ', resolve));
22
+
23
+ console.log(`\x1b[90mProvider: ${provider.label} | Type your message (Ctrl+C to exit)\x1b[0m\n`);
24
+
25
+ const messages: Message[] = [{ role: 'system', content: 'You are a helpful AI assistant with access to tools.' }];
26
+
27
+ while (true) {
28
+ const input = await prompt();
29
+ if (!input.trim()) continue;
30
+ if (input.trim() === '/quit' || input.trim() === '/exit') break;
31
+
32
+ messages.push({ role: 'user', content: input });
33
+
34
+ eventBus.emit({ type: 'model.request.start', provider: provider.id, model: opts.model ?? 'default', timestamp: now() });
35
+ const startMs = Date.now();
36
+
37
+ try {
38
+ for await (const event of provider.stream({
39
+ model: opts.model ?? (await provider.listModels())[0]!.id,
40
+ messages,
41
+ tools: runtime ? undefined : undefined, // Tools available through runtime
42
+ })) {
43
+ if (event.type === 'text-delta') {
44
+ process.stdout.write(event.text);
45
+ } else if (event.type === 'finish') {
46
+ const durationMs = Date.now() - startMs;
47
+ eventBus.emit({ type: 'model.request.end', provider: provider.id, model: opts.model ?? 'default', usage: event.result.usage, durationMs, timestamp: now() });
48
+ messages.push({ role: 'assistant', content: event.result.content });
49
+ if (!event.result.content.endsWith('\n')) process.stdout.write('\n');
50
+ }
51
+ }
52
+ } catch (err) {
53
+ const errMsg = err instanceof Error ? err.message : String(err);
54
+ console.error(`\x1b[31mError: ${errMsg}\x1b[0m`);
55
+ }
56
+ console.log();
57
+ }
58
+
59
+ rl.close();
60
+ console.log('\x1b[90mGoodbye.\x1b[0m');
61
+ process.exit(0);
62
+ }
src/cli/commands/config.ts ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ // ─── Config Command ──────────────────────────────────────────────────────────
2
+
3
+ export async function configCommand(opts: { show?: boolean }): Promise<void> {
4
+ const config = {
5
+ provider: process.env['HARNESS_PROVIDER'] ?? 'anthropic',
6
+ model: process.env['HARNESS_MODEL'] ?? 'auto (first from provider)',
7
+ approvalMode: process.env['HARNESS_APPROVAL'] ?? 'confirm-writes',
8
+ budget: {
9
+ maxTokens: process.env['HARNESS_BUDGET_TOKENS'] ?? 'unlimited',
10
+ maxCostUsd: process.env['HARNESS_BUDGET_COST'] ?? 'unlimited',
11
+ },
12
+ skills: ['coding', 'research', 'docs'],
13
+ tools: {
14
+ filesystem: true,
15
+ shell: true,
16
+ webFetch: true,
17
+ },
18
+ observability: {
19
+ level: 'standard',
20
+ saveTraces: true,
21
+ },
22
+ };
23
+
24
+ console.log('\n\x1b[1mCurrent Configuration:\x1b[0m\n');
25
+ console.log(JSON.stringify(config, null, 2));
26
+ console.log('\n\x1b[90mSet via environment variables (HARNESS_PROVIDER, HARNESS_MODEL, etc.) or --flags.\x1b[0m\n');
27
+ }
src/cli/commands/providers.ts ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ // ─── Providers Command ──────────────────────────────────────────────────────
2
+ import { resolveProvider } from '../state/provider-resolver.js';
3
+
4
+ export async function providersCommand(): Promise<void> {
5
+ const providerIds = ['openai', 'anthropic', 'gemini', 'openrouter'];
6
+
7
+ console.log('\n\x1b[1mAvailable Providers:\x1b[0m\n');
8
+
9
+ for (const id of providerIds) {
10
+ try {
11
+ const provider = resolveProvider(id);
12
+ const models = await provider.listModels();
13
+ console.log(` \x1b[36m${provider.label}\x1b[0m (${id})`);
14
+ for (const model of models) {
15
+ const cost = model.costPerMillionInput ? `$${model.costPerMillionInput}/M in, $${model.costPerMillionOutput}/M out` : 'pricing unknown';
16
+ console.log(` • ${model.name} \x1b[90m(${model.id}) — ${(model.contextWindow / 1000).toFixed(0)}K ctx — ${cost}\x1b[0m`);
17
+ }
18
+ console.log();
19
+ } catch {
20
+ console.log(` \x1b[33m${id}\x1b[0m — not configured\n`);
21
+ }
22
+ }
23
+ }
src/cli/commands/run.ts ADDED
@@ -0,0 +1,96 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ // ─── Run Command ────────────────────────────────────────────────────────────
2
+ import { createRuntime } from '../state/factory.js';
3
+ import { EventRenderer, renderMetrics } from '../renderers/index.js';
4
+ import { Runtime, type RuntimeConfig } from '../../core/runtime/index.js';
5
+ import { EventBus } from '../../core/events/index.js';
6
+ import { ToolRegistry } from '../../core/tools/index.js';
7
+ import { SkillRegistry } from '../../core/skills/index.js';
8
+ import { PolicyEngine } from '../../core/policy/index.js';
9
+ import { MetricsCollector } from '../../core/observability/index.js';
10
+ import { ArtifactStore } from '../../core/artifacts/index.js';
11
+ import { Evaluator, outputNotEmptyCheck, hasArtifactsCheck } from '../../core/evaluators/index.js';
12
+ import { readFileTool, writeFileTool, listDirTool } from '../../tools/fs/index.js';
13
+ import { shellExecTool } from '../../tools/shell/index.js';
14
+ import { webFetchTool } from '../../tools/web/index.js';
15
+ import { codingSkill } from '../../skills/coding/index.js';
16
+ import { researchSkill } from '../../skills/research/index.js';
17
+ import { docsSkill } from '../../skills/docs/index.js';
18
+ import { resolveProvider } from '../state/provider-resolver.js';
19
+
20
+ export async function runCommand(goal: string, opts: {
21
+ provider: string;
22
+ model?: string;
23
+ skills: string[];
24
+ maxTurns?: string;
25
+ budgetTokens?: string;
26
+ budgetCost?: string;
27
+ approval?: string;
28
+ verbose?: boolean;
29
+ compact?: boolean;
30
+ }): Promise<void> {
31
+ const renderer = new EventRenderer({ verbose: opts.verbose ?? false, compact: opts.compact ?? false });
32
+
33
+ // Setup
34
+ const eventBus = new EventBus();
35
+ eventBus.on((event) => renderer.render(event));
36
+
37
+ const provider = resolveProvider(opts.provider);
38
+ const model = opts.model ?? (await provider.listModels())[0]!.id;
39
+
40
+ // Tools
41
+ const tools = new ToolRegistry();
42
+ tools.register(readFileTool);
43
+ tools.register(writeFileTool);
44
+ tools.register(listDirTool);
45
+ tools.register(shellExecTool);
46
+ tools.register(webFetchTool);
47
+
48
+ // Skills
49
+ const skills = new SkillRegistry();
50
+ skills.register(codingSkill);
51
+ skills.register(researchSkill);
52
+ skills.register(docsSkill);
53
+
54
+ // Policy
55
+ const policy = new PolicyEngine({
56
+ mode: (opts.approval as any) ?? 'confirm-writes',
57
+ });
58
+
59
+ // Metrics
60
+ const metrics = new MetricsCollector();
61
+
62
+ // Artifacts
63
+ const artifacts = new ArtifactStore();
64
+
65
+ // Evaluator
66
+ const evaluator = new Evaluator();
67
+ evaluator.addCheck(outputNotEmptyCheck);
68
+
69
+ // Runtime config
70
+ const config: RuntimeConfig = {
71
+ provider,
72
+ model,
73
+ tools,
74
+ skills,
75
+ policy,
76
+ metrics,
77
+ artifacts,
78
+ evaluator,
79
+ eventBus,
80
+ systemPrompt: `You are an AI agent executing tasks autonomously. You have tools available. Complete the goal thoroughly, verify your work, and report results.`,
81
+ activeSkills: opts.skills,
82
+ maxTurns: opts.maxTurns ? parseInt(opts.maxTurns) : 20,
83
+ budgetTokens: opts.budgetTokens ? parseInt(opts.budgetTokens) : undefined,
84
+ budgetCostUsd: opts.budgetCost ? parseFloat(opts.budgetCost) : undefined,
85
+ };
86
+
87
+ // Execute
88
+ const runtime = new Runtime(config, goal);
89
+ const state = await runtime.run();
90
+
91
+ // Summary
92
+ const summary = metrics.summarize(state.id);
93
+ renderMetrics(summary);
94
+
95
+ process.exit(state.status === 'completed' ? 0 : 1);
96
+ }
src/cli/commands/skills.ts ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ // ─── Skills Command ──────────────────────────────────────────────────────────
2
+ import { codingSkill } from '../../skills/coding/index.js';
3
+ import { researchSkill } from '../../skills/research/index.js';
4
+ import { docsSkill } from '../../skills/docs/index.js';
5
+
6
+ export async function skillsCommand(): Promise<void> {
7
+ const skills = [codingSkill, researchSkill, docsSkill];
8
+
9
+ console.log('\n\x1b[1mAvailable Skills:\x1b[0m\n');
10
+
11
+ for (const skill of skills) {
12
+ console.log(` \x1b[35m${skill.id}\x1b[0m — ${skill.title}`);
13
+ console.log(` ${skill.description}`);
14
+ if (skill.suggestedTools?.length) {
15
+ console.log(` \x1b[90mTools: ${skill.suggestedTools.join(', ')}\x1b[0m`);
16
+ }
17
+ console.log();
18
+ }
19
+ }
src/cli/commands/tools.ts ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ // ─── Tools Command ──────────────────────────────────────────────────────────
2
+ import { ToolRegistry } from '../../core/tools/index.js';
3
+ import { readFileTool, writeFileTool, listDirTool } from '../../tools/fs/index.js';
4
+ import { shellExecTool } from '../../tools/shell/index.js';
5
+ import { webFetchTool } from '../../tools/web/index.js';
6
+
7
+ export async function toolsCommand(): Promise<void> {
8
+ const registry = new ToolRegistry();
9
+ registry.register(readFileTool);
10
+ registry.register(writeFileTool);
11
+ registry.register(listDirTool);
12
+ registry.register(shellExecTool);
13
+ registry.register(webFetchTool);
14
+
15
+ console.log('\n\x1b[1mAvailable Tools:\x1b[0m\n');
16
+
17
+ for (const tool of registry.list()) {
18
+ const permColor = tool.permission === 'read' ? '32' : tool.permission === 'write' ? '33' : tool.permission === 'exec' ? '31' : tool.permission === 'network' ? '35' : '31';
19
+ console.log(` ${tool.renderer?.icon ?? '🔧'} \x1b[1m${tool.name}\x1b[0m \x1b[${permColor}m[${tool.permission}]\x1b[0m`);
20
+ console.log(` ${tool.description}`);
21
+ console.log(` \x1b[90mtimeout: ${tool.timeout}ms | retries: ${tool.retries} | side-effect: ${tool.sideEffect}\x1b[0m`);
22
+ console.log();
23
+ }
24
+ }
src/cli/index.ts ADDED
@@ -0,0 +1,65 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ // ─── CLI Entry Point ────────────────────────────────────────────────────────
2
+ import { Command } from 'commander';
3
+ import { chatCommand } from './commands/chat.js';
4
+ import { runCommand } from './commands/run.js';
5
+ import { providersCommand } from './commands/providers.js';
6
+ import { toolsCommand } from './commands/tools.js';
7
+ import { skillsCommand } from './commands/skills.js';
8
+ import { configCommand } from './commands/config.js';
9
+ import { renderBanner } from './renderers/index.js';
10
+
11
+ const program = new Command();
12
+
13
+ program
14
+ .name('harness')
15
+ .description('Production-grade, model-agnostic AI agent CLI')
16
+ .version('0.1.0');
17
+
18
+ program
19
+ .command('chat')
20
+ .description('Interactive chat with the AI agent')
21
+ .option('-p, --provider <provider>', 'Provider to use', 'anthropic')
22
+ .option('-m, --model <model>', 'Model to use')
23
+ .option('-s, --skills <skills...>', 'Skills to load', ['coding'])
24
+ .option('--verbose', 'Show detailed event information')
25
+ .option('--compact', 'Minimal output mode')
26
+ .action(chatCommand);
27
+
28
+ program
29
+ .command('run <goal>')
30
+ .description('Run an autonomous task with the given goal')
31
+ .option('-p, --provider <provider>', 'Provider to use', 'anthropic')
32
+ .option('-m, --model <model>', 'Model to use')
33
+ .option('-s, --skills <skills...>', 'Skills to load', ['coding'])
34
+ .option('--max-turns <n>', 'Maximum turns', '20')
35
+ .option('--budget-tokens <n>', 'Token budget')
36
+ .option('--budget-cost <n>', 'Cost budget in USD')
37
+ .option('--approval <mode>', 'Approval mode', 'confirm-writes')
38
+ .option('--verbose', 'Show detailed event information')
39
+ .option('--compact', 'Minimal output mode')
40
+ .action(runCommand);
41
+
42
+ program
43
+ .command('providers')
44
+ .description('List available providers and models')
45
+ .action(providersCommand);
46
+
47
+ program
48
+ .command('tools')
49
+ .description('List available tools')
50
+ .action(toolsCommand);
51
+
52
+ program
53
+ .command('skills')
54
+ .description('List available skills')
55
+ .action(skillsCommand);
56
+
57
+ program
58
+ .command('config')
59
+ .description('Show or edit configuration')
60
+ .option('--show', 'Show current config')
61
+ .action(configCommand);
62
+
63
+ // Show banner and parse
64
+ renderBanner();
65
+ program.parse();
src/cli/renderers/index.ts ADDED
@@ -0,0 +1,208 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ // ─── CLI Renderer ───────────────────────────────────────────────────────────
2
+ // Beautiful terminal output: streaming, panels, events, spinners, markdown.
3
+
4
+ import type { HarnessEvent } from '../../core/events/index.js';
5
+
6
+ // ─── ANSI color helpers (no dependency needed for basic colors) ──────────────
7
+ const esc = (code: string) => `\x1b[${code}m`;
8
+ const reset = esc('0');
9
+ const bold = (s: string) => `${esc('1')}${s}${reset}`;
10
+ const dim = (s: string) => `${esc('2')}${s}${reset}`;
11
+ const green = (s: string) => `${esc('32')}${s}${reset}`;
12
+ const yellow = (s: string) => `${esc('33')}${s}${reset}`;
13
+ const blue = (s: string) => `${esc('34')}${s}${reset}`;
14
+ const magenta = (s: string) => `${esc('35')}${s}${reset}`;
15
+ const cyan = (s: string) => `${esc('36')}${s}${reset}`;
16
+ const red = (s: string) => `${esc('31')}${s}${reset}`;
17
+ const gray = (s: string) => `${esc('90')}${s}${reset}`;
18
+
19
+ // ─── Box Drawing ────────────────────────────────────────────────────────────
20
+ const BOX = { tl: '╭', tr: '╮', bl: '╰', br: '╯', h: '─', v: '│' };
21
+
22
+ function box(title: string, content: string, color: (s: string) => string = cyan, width = 72): string {
23
+ const innerW = width - 4;
24
+ const titleStr = ` ${title} `;
25
+ const topLen = Math.max(0, innerW - titleStr.length);
26
+ const top = color(`${BOX.tl}${BOX.h}${titleStr}${'─'.repeat(topLen)}${BOX.tr}`);
27
+ const bot = color(`${BOX.bl}${'─'.repeat(innerW + 2)}${BOX.br}`);
28
+ const lines = content.split('\n').map((l) => {
29
+ const trimmed = l.slice(0, innerW);
30
+ return `${color(BOX.v)} ${trimmed}${' '.repeat(Math.max(0, innerW - stripAnsi(trimmed).length))} ${color(BOX.v)}`;
31
+ });
32
+ return [top, ...lines, bot].join('\n');
33
+ }
34
+
35
+ function stripAnsi(s: string): string {
36
+ return s.replace(/\x1b\[[0-9;]*m/g, '');
37
+ }
38
+
39
+ // ─── Spinner ────────────────────────────────────────────────────────────────
40
+ const SPINNER_FRAMES = ['⠋', '⠙', '⠹', '⠸', '⠼', '⠴', '⠦', '⠧', '⠇', '⠏'];
41
+
42
+ export class Spinner {
43
+ private frame = 0;
44
+ private interval: ReturnType<typeof setInterval> | null = null;
45
+ private message = '';
46
+
47
+ start(message: string): void {
48
+ this.message = message;
49
+ this.interval = setInterval(() => {
50
+ this.frame = (this.frame + 1) % SPINNER_FRAMES.length;
51
+ process.stdout.write(`\r${cyan(SPINNER_FRAMES[this.frame]!)} ${this.message}`);
52
+ }, 80);
53
+ }
54
+
55
+ update(message: string): void {
56
+ this.message = message;
57
+ }
58
+
59
+ stop(finalMessage?: string): void {
60
+ if (this.interval) clearInterval(this.interval);
61
+ this.interval = null;
62
+ process.stdout.write(`\r${' '.repeat(this.message.length + 4)}\r`);
63
+ if (finalMessage) console.log(finalMessage);
64
+ }
65
+ }
66
+
67
+ // ─── Event Renderer ─────────────────────────────────────────────────────────
68
+ export interface RendererOptions {
69
+ verbose: boolean;
70
+ compact: boolean;
71
+ }
72
+
73
+ export class EventRenderer {
74
+ private opts: RendererOptions;
75
+ private spinner = new Spinner();
76
+ private streamBuffer = '';
77
+
78
+ constructor(opts: Partial<RendererOptions> = {}) {
79
+ this.opts = { verbose: false, compact: false, ...opts };
80
+ }
81
+
82
+ render(event: HarnessEvent): void {
83
+ switch (event.type) {
84
+ case 'session.started':
85
+ console.log('\n' + box('Session', `${bold('Goal:')} ${event.goal}\n${gray(`ID: ${event.sessionId}`)}`, cyan));
86
+ break;
87
+
88
+ case 'session.completed':
89
+ this.flushStream();
90
+ console.log('\n' + green(`✓ ${bold('Session completed')} — ${event.summary}`));
91
+ break;
92
+
93
+ case 'session.failed':
94
+ this.flushStream();
95
+ console.log('\n' + red(`✗ ${bold('Session failed')} — ${event.error}`));
96
+ break;
97
+
98
+ case 'plan.updated':
99
+ if (!this.opts.compact) {
100
+ const planStr = event.items.map((item) => {
101
+ const icon = item.status === 'completed' ? green('✓') : item.status === 'in_progress' ? yellow('▶') : item.status === 'failed' ? red('✗') : gray('○');
102
+ return ` ${icon} ${item.title}`;
103
+ }).join('\n');
104
+ console.log('\n' + box('Plan', planStr, magenta));
105
+ }
106
+ break;
107
+
108
+ case 'model.request.start':
109
+ this.spinner.start(`${event.provider}/${event.model} thinking…`);
110
+ break;
111
+
112
+ case 'model.request.end':
113
+ this.spinner.stop();
114
+ if (this.opts.verbose) {
115
+ console.log(gray(` ⏱ ${event.durationMs}ms | ${event.usage.totalTokens} tokens | ~$${(event.usage.estimatedCostUsd ?? 0).toFixed(4)}`));
116
+ }
117
+ break;
118
+
119
+ case 'model.stream.delta':
120
+ this.streamBuffer += event.text;
121
+ process.stdout.write(event.text);
122
+ break;
123
+
124
+ case 'model.stream.end':
125
+ this.flushStream();
126
+ break;
127
+
128
+ case 'tool.requested':
129
+ console.log(`\n${blue('⚡')} ${bold(event.toolCall.toolName)} ${gray(`[${event.toolCall.id.slice(0, 8)}]`)}`);
130
+ if (this.opts.verbose) {
131
+ console.log(gray(` Input: ${JSON.stringify(event.toolCall.input).slice(0, 200)}`));
132
+ }
133
+ break;
134
+
135
+ case 'tool.started':
136
+ this.spinner.start('Tool executing…');
137
+ break;
138
+
139
+ case 'tool.progress':
140
+ this.spinner.update(event.message);
141
+ break;
142
+
143
+ case 'tool.finished':
144
+ this.spinner.stop(green(` ✓ Done`) + (this.opts.verbose ? gray(` (${event.durationMs}ms)`) : ''));
145
+ break;
146
+
147
+ case 'tool.failed':
148
+ this.spinner.stop(red(` ✗ Failed: ${event.error}`));
149
+ break;
150
+
151
+ case 'tool.denied':
152
+ console.log(yellow(` ⚠ Denied: ${event.reason}`));
153
+ break;
154
+
155
+ case 'evaluation.completed':
156
+ const r = event.report;
157
+ const icon = r.passed ? green('✓') : red('✗');
158
+ const checksStr = r.checks.map((c) => ` ${c.passed ? green('✓') : red('✗')} ${c.name}${c.message ? gray(` — ${c.message}`) : ''}`).join('\n');
159
+ console.log('\n' + box('Evaluation', `${icon} ${r.summary}\n${checksStr}`, r.passed ? green : red));
160
+ break;
161
+
162
+ case 'artifact.created':
163
+ console.log(`${magenta('📎')} Artifact: ${bold(event.artifact.title)} ${gray(`(${event.artifact.type})`)}`);
164
+ break;
165
+
166
+ case 'budget.warning':
167
+ console.log(yellow(`⚠ Budget warning: ${event.usage.totalTokens} tokens used`));
168
+ break;
169
+
170
+ case 'error':
171
+ console.log(red(`✗ Error: ${event.message}`));
172
+ break;
173
+ }
174
+ }
175
+
176
+ private flushStream(): void {
177
+ if (this.streamBuffer) {
178
+ // Ensure newline after streamed content
179
+ if (!this.streamBuffer.endsWith('\n')) process.stdout.write('\n');
180
+ this.streamBuffer = '';
181
+ }
182
+ }
183
+ }
184
+
185
+ // ─── Header / Banner ────────────────────────────────────────────────────────
186
+ export function renderBanner(): void {
187
+ console.log(`
188
+ ${cyan(bold('╭─────────────────────────────────────╮'))}
189
+ ${cyan(bold('│'))} ${bold('⚡ AI Harness')} ${gray('v0.1.0')} ${cyan(bold('│'))}
190
+ ${cyan(bold('│'))} ${dim('model-agnostic CLI agent runtime')} ${cyan(bold('│'))}
191
+ ${cyan(bold('╰─────────────────────────────────────╯'))}
192
+ `);
193
+ }
194
+
195
+ // ─── Metrics Summary ────────────────────────────────────────────────────────
196
+ export function renderMetrics(metrics: {
197
+ modelCalls: number; toolCalls: number; totalTokens: number;
198
+ estimatedCostUsd: number; totalDurationMs: number; toolSuccessRate: number;
199
+ }): void {
200
+ const content = [
201
+ `${bold('Model calls:')} ${metrics.modelCalls}`,
202
+ `${bold('Tool calls:')} ${metrics.toolCalls} (${Math.round(metrics.toolSuccessRate * 100)}% success)`,
203
+ `${bold('Total tokens:')} ${metrics.totalTokens.toLocaleString()}`,
204
+ `${bold('Est. cost:')} $${metrics.estimatedCostUsd.toFixed(4)}`,
205
+ `${bold('Duration:')} ${(metrics.totalDurationMs / 1000).toFixed(1)}s`,
206
+ ].join('\n');
207
+ console.log('\n' + box('Metrics', content, gray));
208
+ }
src/cli/state/factory.ts ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ // ─── Runtime Factory ─────────────────────────────────────────────────────────
2
+ import { EventBus } from '../../core/events/index.js';
3
+ import type { ProviderAdapter } from '../../core/provider/index.js';
4
+ import { resolveProvider } from './provider-resolver.js';
5
+
6
+ export interface CreateRuntimeOpts {
7
+ provider: string;
8
+ model?: string;
9
+ skills: string[];
10
+ verbose?: boolean;
11
+ compact?: boolean;
12
+ }
13
+
14
+ export async function createRuntime(opts: CreateRuntimeOpts) {
15
+ const eventBus = new EventBus();
16
+ const provider = resolveProvider(opts.provider);
17
+ const model = opts.model ?? (await provider.listModels())[0]?.id ?? 'unknown';
18
+
19
+ return {
20
+ runtime: null, // Chat mode doesn't use full runtime
21
+ eventBus,
22
+ provider,
23
+ model,
24
+ };
25
+ }
src/cli/state/provider-resolver.ts ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ // ─── Provider Resolver ───────────────────────────────────────────────────────
2
+ import type { ProviderAdapter } from '../../core/provider/index.js';
3
+ import { OpenAIProvider } from '../../providers/openai/index.js';
4
+ import { AnthropicProvider } from '../../providers/anthropic/index.js';
5
+ import { GeminiProvider } from '../../providers/gemini/index.js';
6
+ import { OpenAICompatibleProvider, OpenRouterProvider } from '../../providers/openrouter/index.js';
7
+
8
+ export function resolveProvider(id: string, config?: { apiKey?: string; baseUrl?: string }): ProviderAdapter {
9
+ switch (id) {
10
+ case 'openai':
11
+ return new OpenAIProvider({ id: 'openai', ...config });
12
+ case 'anthropic':
13
+ return new AnthropicProvider({ id: 'anthropic', ...config });
14
+ case 'gemini':
15
+ return new GeminiProvider({ id: 'gemini', ...config });
16
+ case 'openrouter':
17
+ return new OpenRouterProvider({ ...config });
18
+ default:
19
+ // Treat as OpenAI-compatible endpoint
20
+ return new OpenAICompatibleProvider({
21
+ id,
22
+ label: id,
23
+ baseUrl: config?.baseUrl ?? process.env[`${id.toUpperCase()}_BASE_URL`],
24
+ apiKey: config?.apiKey ?? process.env[`${id.toUpperCase()}_API_KEY`],
25
+ } as any);
26
+ }
27
+ }
src/core/artifacts/index.ts ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ // ─── Artifacts ──────────────────────────────────────────────────────────────
2
+ // First-class artifact persistence and retrieval.
3
+
4
+ import type { ArtifactRecord } from '../events/index.js';
5
+
6
+ export class ArtifactStore {
7
+ private artifacts: ArtifactRecord[] = [];
8
+
9
+ add(artifact: ArtifactRecord): void {
10
+ this.artifacts.push(artifact);
11
+ }
12
+
13
+ get(id: string): ArtifactRecord | undefined {
14
+ return this.artifacts.find((a) => a.id === id);
15
+ }
16
+
17
+ list(): ArtifactRecord[] {
18
+ return [...this.artifacts];
19
+ }
20
+
21
+ listByType(type: ArtifactRecord['type']): ArtifactRecord[] {
22
+ return this.artifacts.filter((a) => a.type === type);
23
+ }
24
+
25
+ exportMarkdown(): string {
26
+ if (!this.artifacts.length) return '# Artifacts\n\nNo artifacts generated.\n';
27
+ let md = '# Artifacts\n\n';
28
+ for (const a of this.artifacts) {
29
+ md += `## ${a.title}\n\n`;
30
+ md += `- **Type:** ${a.type}\n`;
31
+ md += `- **Created:** ${a.createdAt}\n`;
32
+ if (a.path) md += `- **Path:** \`${a.path}\`\n`;
33
+ if (a.content) {
34
+ md += `\n\`\`\`\n${a.content.slice(0, 2000)}\n\`\`\`\n`;
35
+ }
36
+ md += '\n---\n\n';
37
+ }
38
+ return md;
39
+ }
40
+
41
+ exportJson(): string {
42
+ return JSON.stringify(this.artifacts, null, 2);
43
+ }
44
+
45
+ clear(): void {
46
+ this.artifacts = [];
47
+ }
48
+ }
src/core/evaluators/index.ts ADDED
@@ -0,0 +1,75 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ // ─── Evaluators ─────────────────────────────────────────────────────────────
2
+ // Structured evaluation hooks: schema checks, tests, rubric scoring.
3
+
4
+ import type { EvaluationReport } from '../events/index.js';
5
+
6
+ export interface EvalCheck {
7
+ name: string;
8
+ run(context: EvalContext): Promise<{ passed: boolean; message?: string }>;
9
+ }
10
+
11
+ export interface EvalContext {
12
+ sessionId: string;
13
+ goal: string;
14
+ artifacts: Array<{ path?: string; content?: string; type: string }>;
15
+ assistantOutput: string;
16
+ workDir: string;
17
+ }
18
+
19
+ export class Evaluator {
20
+ private checks: EvalCheck[] = [];
21
+
22
+ addCheck(check: EvalCheck): void {
23
+ this.checks.push(check);
24
+ }
25
+
26
+ async evaluate(ctx: EvalContext): Promise<EvaluationReport> {
27
+ const results: Array<{ name: string; passed: boolean; message?: string }> = [];
28
+
29
+ for (const check of this.checks) {
30
+ try {
31
+ const result = await check.run(ctx);
32
+ results.push({ name: check.name, ...result });
33
+ } catch (err) {
34
+ results.push({
35
+ name: check.name,
36
+ passed: false,
37
+ message: `Check threw: ${err instanceof Error ? err.message : String(err)}`,
38
+ });
39
+ }
40
+ }
41
+
42
+ const passed = results.every((r) => r.passed);
43
+ const score = results.length ? results.filter((r) => r.passed).length / results.length : 1;
44
+
45
+ return {
46
+ passed,
47
+ score,
48
+ checks: results,
49
+ summary: passed
50
+ ? `All ${results.length} checks passed.`
51
+ : `${results.filter((r) => !r.passed).length}/${results.length} checks failed.`,
52
+ };
53
+ }
54
+ }
55
+
56
+ // ─── Built-in Checks ────────────────────────────────────────────────────────
57
+ export const outputNotEmptyCheck: EvalCheck = {
58
+ name: 'output-not-empty',
59
+ async run(ctx) {
60
+ return {
61
+ passed: ctx.assistantOutput.trim().length > 0,
62
+ message: ctx.assistantOutput.trim().length > 0 ? undefined : 'Assistant output is empty.',
63
+ };
64
+ },
65
+ };
66
+
67
+ export const hasArtifactsCheck: EvalCheck = {
68
+ name: 'has-artifacts',
69
+ async run(ctx) {
70
+ return {
71
+ passed: ctx.artifacts.length > 0,
72
+ message: ctx.artifacts.length > 0 ? undefined : 'No artifacts were produced.',
73
+ };
74
+ },
75
+ };
src/core/events/index.ts ADDED
@@ -0,0 +1,129 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ // ─── Core Event Types ───────────────────────────────────────────────────────
2
+ // The internal event model is the backbone of the harness. Rendering, logging,
3
+ // replay, export, and debugging all consume the same event stream.
4
+
5
+ import { z } from 'zod';
6
+
7
+ // ─── Plan ───────────────────────────────────────────────────────────────────
8
+ export const PlanItemSchema = z.object({
9
+ id: z.string(),
10
+ title: z.string(),
11
+ status: z.enum(['pending', 'in_progress', 'completed', 'failed', 'skipped']),
12
+ detail: z.string().optional(),
13
+ });
14
+ export type PlanItem = z.infer<typeof PlanItemSchema>;
15
+
16
+ // ─── Tool Call Record ───────────────────────────────────────────────────────
17
+ export const ToolCallRecordSchema = z.object({
18
+ id: z.string(),
19
+ toolId: z.string(),
20
+ toolName: z.string(),
21
+ input: z.unknown(),
22
+ status: z.enum([
23
+ 'queued',
24
+ 'awaiting-approval',
25
+ 'running',
26
+ 'streaming',
27
+ 'success',
28
+ 'failed',
29
+ 'denied',
30
+ 'cancelled',
31
+ ]),
32
+ output: z.unknown().optional(),
33
+ error: z.string().optional(),
34
+ durationMs: z.number().optional(),
35
+ startedAt: z.string().optional(),
36
+ finishedAt: z.string().optional(),
37
+ });
38
+ export type ToolCallRecord = z.infer<typeof ToolCallRecordSchema>;
39
+
40
+ // ─── Artifact Record ────────────────────────────────────────────────────────
41
+ export const ArtifactRecordSchema = z.object({
42
+ id: z.string(),
43
+ type: z.enum(['file', 'patch', 'log', 'screenshot', 'json', 'markdown', 'other']),
44
+ path: z.string().optional(),
45
+ title: z.string(),
46
+ content: z.string().optional(),
47
+ mimeType: z.string().optional(),
48
+ createdAt: z.string(),
49
+ });
50
+ export type ArtifactRecord = z.infer<typeof ArtifactRecordSchema>;
51
+
52
+ // ─── Evaluation Report ──────────────────────────────────────────────────────
53
+ export const EvaluationReportSchema = z.object({
54
+ passed: z.boolean(),
55
+ score: z.number().min(0).max(1).optional(),
56
+ checks: z.array(z.object({
57
+ name: z.string(),
58
+ passed: z.boolean(),
59
+ message: z.string().optional(),
60
+ })),
61
+ summary: z.string().optional(),
62
+ });
63
+ export type EvaluationReport = z.infer<typeof EvaluationReportSchema>;
64
+
65
+ // ─── Token Usage ────────────────────────────────────────────────────────────
66
+ export const TokenUsageSchema = z.object({
67
+ promptTokens: z.number(),
68
+ completionTokens: z.number(),
69
+ totalTokens: z.number(),
70
+ estimatedCostUsd: z.number().optional(),
71
+ });
72
+ export type TokenUsage = z.infer<typeof TokenUsageSchema>;
73
+
74
+ // ─── Harness Events (discriminated union) ───────────────────────────────────
75
+ export type HarnessEvent =
76
+ | { type: 'session.started'; sessionId: string; goal: string; timestamp: string }
77
+ | { type: 'session.completed'; sessionId: string; summary: string; timestamp: string }
78
+ | { type: 'session.failed'; sessionId: string; error: string; timestamp: string }
79
+ | { type: 'plan.updated'; items: PlanItem[]; timestamp: string }
80
+ | { type: 'model.request.start'; provider: string; model: string; timestamp: string }
81
+ | { type: 'model.request.end'; provider: string; model: string; usage: TokenUsage; durationMs: number; timestamp: string }
82
+ | { type: 'model.stream.delta'; text: string; timestamp: string }
83
+ | { type: 'model.stream.end'; fullText: string; timestamp: string }
84
+ | { type: 'tool.requested'; toolCall: ToolCallRecord; timestamp: string }
85
+ | { type: 'tool.approved'; toolCallId: string; timestamp: string }
86
+ | { type: 'tool.denied'; toolCallId: string; reason: string; timestamp: string }
87
+ | { type: 'tool.started'; toolCallId: string; timestamp: string }
88
+ | { type: 'tool.progress'; toolCallId: string; message: string; timestamp: string }
89
+ | { type: 'tool.finished'; toolCallId: string; result: unknown; durationMs: number; timestamp: string }
90
+ | { type: 'tool.failed'; toolCallId: string; error: string; durationMs: number; timestamp: string }
91
+ | { type: 'evaluation.started'; timestamp: string }
92
+ | { type: 'evaluation.completed'; report: EvaluationReport; timestamp: string }
93
+ | { type: 'artifact.created'; artifact: ArtifactRecord; timestamp: string }
94
+ | { type: 'budget.warning'; usage: TokenUsage; limit: number; timestamp: string }
95
+ | { type: 'error'; message: string; code?: string; timestamp: string };
96
+
97
+ // ─── Event Bus ───────────���──────────────────────────────────────────────────
98
+ export type EventListener = (event: HarnessEvent) => void;
99
+
100
+ export class EventBus {
101
+ private listeners: EventListener[] = [];
102
+ private history: HarnessEvent[] = [];
103
+
104
+ on(listener: EventListener): () => void {
105
+ this.listeners.push(listener);
106
+ return () => {
107
+ this.listeners = this.listeners.filter((l) => l !== listener);
108
+ };
109
+ }
110
+
111
+ emit(event: HarnessEvent): void {
112
+ this.history.push(event);
113
+ for (const listener of this.listeners) {
114
+ try { listener(event); } catch { /* swallow listener errors */ }
115
+ }
116
+ }
117
+
118
+ getHistory(): readonly HarnessEvent[] {
119
+ return this.history;
120
+ }
121
+
122
+ clear(): void {
123
+ this.history = [];
124
+ }
125
+ }
126
+
127
+ export function now(): string {
128
+ return new Date().toISOString();
129
+ }
src/core/index.ts ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ // ─── Core barrel export ─────────────────────────────────────────────────────
2
+ export * from './events/index.js';
3
+ export * from './provider/index.js';
4
+ export * from './tools/index.js';
5
+ export * from './skills/index.js';
6
+ export * from './policy/index.js';
7
+ export * from './observability/index.js';
8
+ export * from './artifacts/index.js';
9
+ export * from './evaluators/index.js';
10
+ export * from './runtime/index.js';
src/core/observability/index.ts ADDED
@@ -0,0 +1,82 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ // ─── Observability ──────────────────────────────────────────────────────────
2
+ // Captures metrics for model calls, tool calls, retries, token usage, and cost.
3
+
4
+ import type { TokenUsage } from '../events/index.js';
5
+
6
+ export interface MetricEntry {
7
+ timestamp: string;
8
+ type: 'model-call' | 'tool-call' | 'retry' | 'error';
9
+ provider?: string;
10
+ model?: string;
11
+ toolName?: string;
12
+ durationMs: number;
13
+ usage?: TokenUsage;
14
+ success: boolean;
15
+ error?: string;
16
+ }
17
+
18
+ export interface SessionMetrics {
19
+ sessionId: string;
20
+ startedAt: string;
21
+ endedAt?: string;
22
+ totalDurationMs: number;
23
+ modelCalls: number;
24
+ toolCalls: number;
25
+ toolSuccessRate: number;
26
+ retryCount: number;
27
+ totalTokens: number;
28
+ promptTokens: number;
29
+ completionTokens: number;
30
+ estimatedCostUsd: number;
31
+ evaluationPassRate: number;
32
+ }
33
+
34
+ export class MetricsCollector {
35
+ private entries: MetricEntry[] = [];
36
+ private sessionStart: number = Date.now();
37
+
38
+ record(entry: MetricEntry): void {
39
+ this.entries.push(entry);
40
+ }
41
+
42
+ getEntries(): readonly MetricEntry[] {
43
+ return this.entries;
44
+ }
45
+
46
+ summarize(sessionId: string): SessionMetrics {
47
+ const modelCalls = this.entries.filter((e) => e.type === 'model-call');
48
+ const toolCalls = this.entries.filter((e) => e.type === 'tool-call');
49
+ const retries = this.entries.filter((e) => e.type === 'retry');
50
+ const successfulTools = toolCalls.filter((e) => e.success);
51
+
52
+ let totalTokens = 0, promptTokens = 0, completionTokens = 0, costUsd = 0;
53
+ for (const entry of modelCalls) {
54
+ if (entry.usage) {
55
+ totalTokens += entry.usage.totalTokens;
56
+ promptTokens += entry.usage.promptTokens;
57
+ completionTokens += entry.usage.completionTokens;
58
+ costUsd += entry.usage.estimatedCostUsd ?? 0;
59
+ }
60
+ }
61
+
62
+ return {
63
+ sessionId,
64
+ startedAt: new Date(this.sessionStart).toISOString(),
65
+ totalDurationMs: Date.now() - this.sessionStart,
66
+ modelCalls: modelCalls.length,
67
+ toolCalls: toolCalls.length,
68
+ toolSuccessRate: toolCalls.length ? successfulTools.length / toolCalls.length : 1,
69
+ retryCount: retries.length,
70
+ totalTokens,
71
+ promptTokens,
72
+ completionTokens,
73
+ estimatedCostUsd: costUsd,
74
+ evaluationPassRate: 0, // computed externally
75
+ };
76
+ }
77
+
78
+ reset(): void {
79
+ this.entries = [];
80
+ this.sessionStart = Date.now();
81
+ }
82
+ }
src/core/policy/index.ts ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ // ─── Policy & Permissions ───────────────────────────────────────────────────
2
+ // Enforces confirmation rules for tool execution based on permission levels.
3
+
4
+ import type { PermissionLevel } from '../tools/index.js';
5
+
6
+ export type PolicyMode =
7
+ | 'auto'
8
+ | 'confirm-writes'
9
+ | 'confirm-network'
10
+ | 'manual-all'
11
+ | 'locked-down';
12
+
13
+ export interface PolicyConfig {
14
+ mode: PolicyMode;
15
+ allowedTools?: string[];
16
+ blockedTools?: string[];
17
+ maxCostUsd?: number;
18
+ maxTokens?: number;
19
+ }
20
+
21
+ export class PolicyEngine {
22
+ constructor(private config: PolicyConfig) {}
23
+
24
+ requiresApproval(permission: PermissionLevel, toolName: string): boolean {
25
+ if (this.config.blockedTools?.includes(toolName)) return true;
26
+ if (this.config.allowedTools && !this.config.allowedTools.includes(toolName)) return true;
27
+
28
+ switch (this.config.mode) {
29
+ case 'auto':
30
+ return permission === 'dangerous';
31
+ case 'confirm-writes':
32
+ return ['write', 'exec', 'network', 'dangerous'].includes(permission);
33
+ case 'confirm-network':
34
+ return ['network', 'dangerous'].includes(permission);
35
+ case 'manual-all':
36
+ return true;
37
+ case 'locked-down':
38
+ return true;
39
+ }
40
+ }
41
+
42
+ isBlocked(toolName: string): boolean {
43
+ return this.config.blockedTools?.includes(toolName) ?? false;
44
+ }
45
+
46
+ checkBudget(currentUsage: { tokens: number; costUsd: number }): { ok: boolean; reason?: string } {
47
+ if (this.config.maxTokens && currentUsage.tokens >= this.config.maxTokens) {
48
+ return { ok: false, reason: `Token budget exhausted: ${currentUsage.tokens}/${this.config.maxTokens}` };
49
+ }
50
+ if (this.config.maxCostUsd && currentUsage.costUsd >= this.config.maxCostUsd) {
51
+ return { ok: false, reason: `Cost budget exhausted: $${currentUsage.costUsd.toFixed(4)}/$${this.config.maxCostUsd}` };
52
+ }
53
+ return { ok: true };
54
+ }
55
+ }
src/core/provider/index.ts ADDED
@@ -0,0 +1,99 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ // ─── Provider Adapter Interface ─────────────────────────────────────────────
2
+ // Normalizes differences between model vendors while exposing capabilities.
3
+
4
+ import { z } from 'zod';
5
+ import type { TokenUsage } from '../events/index.js';
6
+
7
+ // ─── Model Capabilities ─────────────────────────────────────────────────────
8
+ export type ModelCapability =
9
+ | 'streaming'
10
+ | 'tool-calling'
11
+ | 'structured-output'
12
+ | 'vision'
13
+ | 'long-context'
14
+ | 'json-mode'
15
+ | 'reasoning';
16
+
17
+ export interface ModelInfo {
18
+ id: string;
19
+ name: string;
20
+ provider: string;
21
+ contextWindow: number;
22
+ maxOutputTokens?: number;
23
+ capabilities: ModelCapability[];
24
+ costPerMillionInput?: number;
25
+ costPerMillionOutput?: number;
26
+ }
27
+
28
+ // ─── Messages ───────────────────────────────────────────────────────────────
29
+ export type MessageRole = 'system' | 'user' | 'assistant' | 'tool';
30
+
31
+ export interface Message {
32
+ role: MessageRole;
33
+ content: string;
34
+ name?: string;
35
+ toolCallId?: string;
36
+ toolCalls?: ToolCallRequest[];
37
+ }
38
+
39
+ export interface ToolCallRequest {
40
+ id: string;
41
+ name: string;
42
+ arguments: string; // JSON string
43
+ }
44
+
45
+ // ─── Invocation ─────────────────────────────────────────────────────────────
46
+ export interface ModelInvocation {
47
+ model: string;
48
+ messages: Message[];
49
+ tools?: ToolDefinition[];
50
+ temperature?: number;
51
+ maxTokens?: number;
52
+ jsonMode?: boolean;
53
+ stop?: string[];
54
+ signal?: AbortSignal;
55
+ }
56
+
57
+ export interface ToolDefinition {
58
+ name: string;
59
+ description: string;
60
+ parameters: Record<string, unknown>; // JSON Schema
61
+ }
62
+
63
+ // ─── Result ─────────────────────────────────────────────────────────────────
64
+ export interface ModelResult {
65
+ content: string;
66
+ toolCalls?: ToolCallRequest[];
67
+ usage: TokenUsage;
68
+ finishReason: 'stop' | 'tool_calls' | 'length' | 'error';
69
+ rawResponse?: unknown;
70
+ }
71
+
72
+ // ─── Streaming Events ───────────────────────────────────────────────────────
73
+ export type ModelStreamEvent =
74
+ | { type: 'text-delta'; text: string }
75
+ | { type: 'tool-call-start'; id: string; name: string }
76
+ | { type: 'tool-call-delta'; id: string; argumentsDelta: string }
77
+ | { type: 'tool-call-end'; id: string }
78
+ | { type: 'finish'; result: ModelResult };
79
+
80
+ // ─── Provider Adapter ───────────────────────────────────────────────────────
81
+ export interface ProviderAdapter {
82
+ id: string;
83
+ label: string;
84
+ listModels(): Promise<ModelInfo[]>;
85
+ invoke(input: ModelInvocation): Promise<ModelResult>;
86
+ stream(input: ModelInvocation): AsyncIterable<ModelStreamEvent>;
87
+ supports(capability: ModelCapability, model?: string): boolean;
88
+ }
89
+
90
+ // ─── Provider Config ────────────────────────────────────────────────────────
91
+ export const ProviderConfigSchema = z.object({
92
+ id: z.string(),
93
+ apiKey: z.string().optional(),
94
+ baseUrl: z.string().optional(),
95
+ defaultModel: z.string().optional(),
96
+ orgId: z.string().optional(),
97
+ headers: z.record(z.string()).optional(),
98
+ });
99
+ export type ProviderConfig = z.infer<typeof ProviderConfigSchema>;
src/core/runtime/index.ts ADDED
@@ -0,0 +1,279 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ // ─── Runtime ────────────────────────────────────────────────────────────────
2
+ // The heart of the harness: manages session state, orchestration, tool execution,
3
+ // budgets, retries, and structured planner/executor/evaluator roles.
4
+
5
+ import { nanoid } from 'nanoid';
6
+ import { EventBus, now, type HarnessEvent, type PlanItem, type TokenUsage, type ToolCallRecord } from '../events/index.js';
7
+ import type { ProviderAdapter, ModelInvocation, Message, ToolCallRequest, ModelResult } from '../provider/index.js';
8
+ import { ToolRegistry, type ToolDef, type ToolContext } from '../tools/index.js';
9
+ import { SkillRegistry } from '../skills/index.js';
10
+ import { PolicyEngine } from '../policy/index.js';
11
+ import { MetricsCollector } from '../observability/index.js';
12
+ import { ArtifactStore } from '../artifacts/index.js';
13
+ import { Evaluator, type EvalContext } from '../evaluators/index.js';
14
+
15
+ // ─── Session State ──────────────────────────────────────────────────────────
16
+ export interface SessionState {
17
+ id: string;
18
+ goal: string;
19
+ plan: PlanItem[];
20
+ messages: Message[];
21
+ artifacts: string[]; // artifact IDs
22
+ provider: string;
23
+ model: string;
24
+ skills: string[];
25
+ budgetUsed: { tokens: number; costUsd: number };
26
+ status: 'running' | 'completed' | 'failed' | 'paused';
27
+ retries: number;
28
+ maxRetries: number;
29
+ createdAt: string;
30
+ }
31
+
32
+ export interface RuntimeConfig {
33
+ provider: ProviderAdapter;
34
+ model: string;
35
+ tools: ToolRegistry;
36
+ skills: SkillRegistry;
37
+ policy: PolicyEngine;
38
+ metrics: MetricsCollector;
39
+ artifacts: ArtifactStore;
40
+ evaluator: Evaluator;
41
+ eventBus: EventBus;
42
+ systemPrompt: string;
43
+ activeSkills: string[];
44
+ maxRetries?: number;
45
+ maxTurns?: number;
46
+ budgetTokens?: number;
47
+ budgetCostUsd?: number;
48
+ approvalHandler?: (toolCall: ToolCallRecord) => Promise<boolean>;
49
+ }
50
+
51
+ // ─── Runtime ────────────────────────────────────────────────────────────────
52
+ export class Runtime {
53
+ private state: SessionState;
54
+ private config: RuntimeConfig;
55
+ private abortController = new AbortController();
56
+ private turn = 0;
57
+
58
+ constructor(config: RuntimeConfig, goal: string) {
59
+ this.config = config;
60
+ this.state = {
61
+ id: nanoid(),
62
+ goal,
63
+ plan: [],
64
+ messages: [],
65
+ artifacts: [],
66
+ provider: config.provider.id,
67
+ model: config.model,
68
+ skills: config.activeSkills,
69
+ budgetUsed: { tokens: 0, costUsd: 0 },
70
+ status: 'running',
71
+ retries: 0,
72
+ maxRetries: config.maxRetries ?? 3,
73
+ createdAt: now(),
74
+ };
75
+ }
76
+
77
+ get sessionId(): string { return this.state.id; }
78
+ get status(): string { return this.state.status; }
79
+
80
+ private emit(event: HarnessEvent): void {
81
+ this.config.eventBus.emit(event);
82
+ }
83
+
84
+ async run(): Promise<SessionState> {
85
+ this.emit({ type: 'session.started', sessionId: this.state.id, goal: this.state.goal, timestamp: now() });
86
+
87
+ // Build system message
88
+ const skillInstructions = this.config.skills.buildInstructions(this.config.activeSkills);
89
+ const systemMsg: Message = {
90
+ role: 'system',
91
+ content: [this.config.systemPrompt, skillInstructions].filter(Boolean).join('\n\n---\n\n'),
92
+ };
93
+ this.state.messages = [systemMsg, { role: 'user', content: this.state.goal }];
94
+
95
+ const maxTurns = this.config.maxTurns ?? 20;
96
+
97
+ try {
98
+ while (this.state.status === 'running' && this.turn < maxTurns) {
99
+ this.turn++;
100
+
101
+ // Budget check
102
+ const budgetCheck = this.config.policy.checkBudget(this.state.budgetUsed);
103
+ if (!budgetCheck.ok) {
104
+ this.emit({ type: 'budget.warning', usage: { promptTokens: 0, completionTokens: 0, totalTokens: this.state.budgetUsed.tokens, estimatedCostUsd: this.state.budgetUsed.costUsd }, limit: this.config.budgetTokens ?? 0, timestamp: now() });
105
+ break;
106
+ }
107
+
108
+ // Invoke model
109
+ const toolDefs = this.config.tools.listForModel();
110
+ const invocation: ModelInvocation = {
111
+ model: this.config.model,
112
+ messages: this.state.messages,
113
+ tools: toolDefs,
114
+ signal: this.abortController.signal,
115
+ };
116
+
117
+ this.emit({ type: 'model.request.start', provider: this.config.provider.id, model: this.config.model, timestamp: now() });
118
+ const startMs = Date.now();
119
+
120
+ let result: ModelResult;
121
+ try {
122
+ result = await this.config.provider.invoke(invocation);
123
+ } catch (err) {
124
+ const errMsg = err instanceof Error ? err.message : String(err);
125
+ this.emit({ type: 'error', message: `Model call failed: ${errMsg}`, timestamp: now() });
126
+ if (this.state.retries < this.state.maxRetries) {
127
+ this.state.retries++;
128
+ continue;
129
+ }
130
+ this.state.status = 'failed';
131
+ break;
132
+ }
133
+
134
+ const durationMs = Date.now() - startMs;
135
+ this.state.budgetUsed.tokens += result.usage.totalTokens;
136
+ this.state.budgetUsed.costUsd += result.usage.estimatedCostUsd ?? 0;
137
+
138
+ this.emit({ type: 'model.request.end', provider: this.config.provider.id, model: this.config.model, usage: result.usage, durationMs, timestamp: now() });
139
+ this.config.metrics.record({ timestamp: now(), type: 'model-call', provider: this.config.provider.id, model: this.config.model, durationMs, usage: result.usage, success: true });
140
+
141
+ // Handle assistant response
142
+ if (result.content) {
143
+ this.emit({ type: 'model.stream.end', fullText: result.content, timestamp: now() });
144
+ }
145
+
146
+ this.state.messages.push({
147
+ role: 'assistant',
148
+ content: result.content,
149
+ toolCalls: result.toolCalls,
150
+ });
151
+
152
+ // If no tool calls, we're done
153
+ if (!result.toolCalls || result.toolCalls.length === 0) {
154
+ this.state.status = 'completed';
155
+ break;
156
+ }
157
+
158
+ // Execute tool calls
159
+ for (const tc of result.toolCalls) {
160
+ const toolResult = await this.executeTool(tc);
161
+ this.state.messages.push({
162
+ role: 'tool',
163
+ content: typeof toolResult === 'string' ? toolResult : JSON.stringify(toolResult),
164
+ toolCallId: tc.id,
165
+ });
166
+ }
167
+ }
168
+
169
+ // Run evaluation
170
+ if (this.state.status === 'completed') {
171
+ this.emit({ type: 'evaluation.started', timestamp: now() });
172
+ const evalCtx: EvalContext = {
173
+ sessionId: this.state.id,
174
+ goal: this.state.goal,
175
+ artifacts: this.config.artifacts.list().map((a) => ({ path: a.path, content: a.content, type: a.type })),
176
+ assistantOutput: this.state.messages.filter((m) => m.role === 'assistant').map((m) => m.content).join('\n'),
177
+ workDir: process.cwd(),
178
+ };
179
+ const report = await this.config.evaluator.evaluate(evalCtx);
180
+ this.emit({ type: 'evaluation.completed', report, timestamp: now() });
181
+ }
182
+
183
+ this.emit({ type: 'session.completed', sessionId: this.state.id, summary: `Completed in ${this.turn} turns.`, timestamp: now() });
184
+ } catch (err) {
185
+ const errMsg = err instanceof Error ? err.message : String(err);
186
+ this.state.status = 'failed';
187
+ this.emit({ type: 'session.failed', sessionId: this.state.id, error: errMsg, timestamp: now() });
188
+ }
189
+
190
+ return this.state;
191
+ }
192
+
193
+ private async executeTool(tc: ToolCallRequest): Promise<unknown> {
194
+ const tool = this.config.tools.getByName(tc.name);
195
+ if (!tool) {
196
+ const errMsg = `Unknown tool: ${tc.name}`;
197
+ this.emit({ type: 'tool.failed', toolCallId: tc.id, error: errMsg, durationMs: 0, timestamp: now() });
198
+ return { error: errMsg };
199
+ }
200
+
201
+ // Parse input
202
+ let input: unknown;
203
+ try {
204
+ const raw = JSON.parse(tc.arguments);
205
+ input = tool.inputSchema.parse(raw);
206
+ } catch (err) {
207
+ const errMsg = `Invalid tool input: ${err instanceof Error ? err.message : String(err)}`;
208
+ this.emit({ type: 'tool.failed', toolCallId: tc.id, error: errMsg, durationMs: 0, timestamp: now() });
209
+ return { error: errMsg };
210
+ }
211
+
212
+ // Policy check
213
+ if (this.config.policy.isBlocked(tool.name)) {
214
+ this.emit({ type: 'tool.denied', toolCallId: tc.id, reason: 'Tool is blocked by policy.', timestamp: now() });
215
+ return { error: 'Tool blocked by policy.' };
216
+ }
217
+
218
+ const needsApproval = this.config.policy.requiresApproval(tool.permission, tool.name);
219
+ const record: ToolCallRecord = {
220
+ id: tc.id,
221
+ toolId: tool.id,
222
+ toolName: tool.name,
223
+ input,
224
+ status: needsApproval ? 'awaiting-approval' : 'queued',
225
+ };
226
+ this.emit({ type: 'tool.requested', toolCall: record, timestamp: now() });
227
+
228
+ if (needsApproval && this.config.approvalHandler) {
229
+ const approved = await this.config.approvalHandler(record);
230
+ if (!approved) {
231
+ this.emit({ type: 'tool.denied', toolCallId: tc.id, reason: 'User denied.', timestamp: now() });
232
+ return { error: 'Tool call denied by user.' };
233
+ }
234
+ this.emit({ type: 'tool.approved', toolCallId: tc.id, timestamp: now() });
235
+ }
236
+
237
+ // Execute
238
+ this.emit({ type: 'tool.started', toolCallId: tc.id, timestamp: now() });
239
+ const startMs = Date.now();
240
+
241
+ const ctx: ToolContext = {
242
+ sessionId: this.state.id,
243
+ workDir: process.cwd(),
244
+ signal: this.abortController.signal,
245
+ emit: (msg) => this.emit({ type: 'tool.progress', toolCallId: tc.id, message: msg, timestamp: now() }),
246
+ };
247
+
248
+ let retries = 0;
249
+ while (retries <= tool.retries) {
250
+ try {
251
+ const result = await Promise.race([
252
+ tool.execute(input, ctx),
253
+ new Promise((_, reject) => setTimeout(() => reject(new Error('Tool timeout')), tool.timeout)),
254
+ ]);
255
+ const durationMs = Date.now() - startMs;
256
+ this.emit({ type: 'tool.finished', toolCallId: tc.id, result, durationMs, timestamp: now() });
257
+ this.config.metrics.record({ timestamp: now(), type: 'tool-call', toolName: tool.name, durationMs, success: true });
258
+ return result;
259
+ } catch (err) {
260
+ retries++;
261
+ if (retries > tool.retries) {
262
+ const durationMs = Date.now() - startMs;
263
+ const errMsg = err instanceof Error ? err.message : String(err);
264
+ this.emit({ type: 'tool.failed', toolCallId: tc.id, error: errMsg, durationMs, timestamp: now() });
265
+ this.config.metrics.record({ timestamp: now(), type: 'tool-call', toolName: tool.name, durationMs, success: false, error: errMsg });
266
+ return { error: errMsg };
267
+ }
268
+ this.config.metrics.record({ timestamp: now(), type: 'retry', toolName: tool.name, durationMs: 0, success: false });
269
+ }
270
+ }
271
+
272
+ return { error: 'Unexpected tool execution path.' };
273
+ }
274
+
275
+ pause(): void { this.state.status = 'paused'; }
276
+ resume(): void { if (this.state.status === 'paused') this.state.status = 'running'; }
277
+ cancel(): void { this.abortController.abort(); this.state.status = 'failed'; }
278
+ getState(): Readonly<SessionState> { return this.state; }
279
+ }
src/core/skills/index.ts ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ // ─── Skills System ──────────────────────────────────────────────────────────
2
+ // Modular instruction packs attachable per task or session.
3
+
4
+ export interface SkillModule {
5
+ id: string;
6
+ title: string;
7
+ description: string;
8
+ instructions: string;
9
+ suggestedTools?: string[];
10
+ tags?: string[];
11
+ }
12
+
13
+ export class SkillRegistry {
14
+ private skills = new Map<string, SkillModule>();
15
+
16
+ register(skill: SkillModule): void {
17
+ this.skills.set(skill.id, skill);
18
+ }
19
+
20
+ get(id: string): SkillModule | undefined {
21
+ return this.skills.get(id);
22
+ }
23
+
24
+ list(): SkillModule[] {
25
+ return [...this.skills.values()];
26
+ }
27
+
28
+ resolve(ids: string[]): SkillModule[] {
29
+ return ids.map((id) => this.get(id)).filter((s): s is SkillModule => !!s);
30
+ }
31
+
32
+ buildInstructions(ids: string[]): string {
33
+ const skills = this.resolve(ids);
34
+ if (!skills.length) return '';
35
+ return skills
36
+ .map((s) => `## Skill: ${s.title}\n\n${s.instructions}`)
37
+ .join('\n\n---\n\n');
38
+ }
39
+ }
src/core/tools/index.ts ADDED
@@ -0,0 +1,113 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ // ─── Tool Registry ──────────────────────────────────────────────────────────
2
+ // First-class subsystem for typed, permissioned, observable tool execution.
3
+
4
+ import { z, ZodSchema } from 'zod';
5
+
6
+ // ─── Permission Levels ──────────────────────────────────────────────────────
7
+ export type PermissionLevel = 'read' | 'write' | 'exec' | 'network' | 'dangerous';
8
+
9
+ // ─── Tool Status ────────────────────────────────────────────────────────────
10
+ export type ToolStatus =
11
+ | 'queued'
12
+ | 'awaiting-approval'
13
+ | 'running'
14
+ | 'streaming'
15
+ | 'success'
16
+ | 'failed'
17
+ | 'denied'
18
+ | 'cancelled';
19
+
20
+ // ─── Side Effect Classification ─────────────────────────────────────────────
21
+ export type SideEffect = 'none' | 'filesystem' | 'network' | 'process' | 'mixed';
22
+
23
+ // ─── Tool Definition ────────────────────────────────────────────────────────
24
+ export interface ToolDef<TInput = unknown, TOutput = unknown> {
25
+ id: string;
26
+ name: string;
27
+ description: string;
28
+ inputSchema: ZodSchema<TInput>;
29
+ outputSchema: ZodSchema<TOutput>;
30
+ permission: PermissionLevel;
31
+ sideEffect: SideEffect;
32
+ timeout: number; // ms
33
+ retries: number;
34
+ tags?: string[];
35
+ renderer?: {
36
+ icon?: string;
37
+ color?: string;
38
+ compact?: boolean;
39
+ };
40
+ execute(input: TInput, ctx: ToolContext): Promise<TOutput>;
41
+ }
42
+
43
+ // ─── Tool Context ───────────────────────────────────────────────────────────
44
+ export interface ToolContext {
45
+ sessionId: string;
46
+ workDir: string;
47
+ signal?: AbortSignal;
48
+ emit(message: string): void; // for progress updates
49
+ }
50
+
51
+ // ─── Tool Registry ──────────────────────────────────────────────────────────
52
+ export class ToolRegistry {
53
+ private tools = new Map<string, ToolDef<any, any>>();
54
+
55
+ register<TI, TO>(tool: ToolDef<TI, TO>): void {
56
+ if (this.tools.has(tool.id)) {
57
+ throw new Error(`Tool already registered: ${tool.id}`);
58
+ }
59
+ this.tools.set(tool.id, tool);
60
+ }
61
+
62
+ get(id: string): ToolDef | undefined {
63
+ return this.tools.get(id);
64
+ }
65
+
66
+ getByName(name: string): ToolDef | undefined {
67
+ for (const tool of this.tools.values()) {
68
+ if (tool.name === name) return tool;
69
+ }
70
+ return undefined;
71
+ }
72
+
73
+ list(): ToolDef[] {
74
+ return [...this.tools.values()];
75
+ }
76
+
77
+ listForModel(): Array<{ name: string; description: string; parameters: Record<string, unknown> }> {
78
+ return this.list().map((t) => ({
79
+ name: t.name,
80
+ description: t.description,
81
+ parameters: this.zodToJsonSchema(t.inputSchema),
82
+ }));
83
+ }
84
+
85
+ private zodToJsonSchema(schema: ZodSchema): Record<string, unknown> {
86
+ // Minimal Zod-to-JSON-Schema converter for tool definitions
87
+ // In production, use zod-to-json-schema package
88
+ const desc = (schema as any)._def;
89
+ if (desc?.typeName === 'ZodObject') {
90
+ const shape = desc.shape();
91
+ const properties: Record<string, unknown> = {};
92
+ const required: string[] = [];
93
+ for (const [key, value] of Object.entries(shape)) {
94
+ const fieldDef = (value as any)._def;
95
+ properties[key] = { type: this.zodTypeToJson(fieldDef), description: fieldDef?.description || '' };
96
+ if (fieldDef?.typeName !== 'ZodOptional') required.push(key);
97
+ }
98
+ return { type: 'object', properties, required };
99
+ }
100
+ return { type: 'object', properties: {} };
101
+ }
102
+
103
+ private zodTypeToJson(def: any): string {
104
+ switch (def?.typeName) {
105
+ case 'ZodString': return 'string';
106
+ case 'ZodNumber': return 'number';
107
+ case 'ZodBoolean': return 'boolean';
108
+ case 'ZodArray': return 'array';
109
+ case 'ZodOptional': return this.zodTypeToJson(def.innerType?._def);
110
+ default: return 'string';
111
+ }
112
+ }
113
+ }
src/providers/anthropic/index.ts ADDED
@@ -0,0 +1,159 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ // ─── Anthropic Provider Adapter ──────────────────────────────────────────────
2
+ import type {
3
+ ProviderAdapter, ProviderConfig, ModelInfo, ModelCapability,
4
+ ModelInvocation, ModelResult, ModelStreamEvent, ToolCallRequest,
5
+ } from '../../core/provider/index.js';
6
+ import type { TokenUsage } from '../../core/events/index.js';
7
+
8
+ export class AnthropicProvider implements ProviderAdapter {
9
+ id = 'anthropic';
10
+ label = 'Anthropic';
11
+ private apiKey: string;
12
+ private baseUrl: string;
13
+
14
+ constructor(config: ProviderConfig) {
15
+ this.apiKey = config.apiKey ?? process.env['ANTHROPIC_API_KEY'] ?? '';
16
+ this.baseUrl = config.baseUrl ?? 'https://api.anthropic.com';
17
+ }
18
+
19
+ async listModels(): Promise<ModelInfo[]> {
20
+ return [
21
+ { id: 'claude-sonnet-4-20250514', name: 'Claude Sonnet 4', provider: 'anthropic', contextWindow: 200000, maxOutputTokens: 64000, capabilities: ['streaming', 'tool-calling', 'vision', 'long-context', 'reasoning'], costPerMillionInput: 3, costPerMillionOutput: 15 },
22
+ { id: 'claude-opus-4-20250514', name: 'Claude Opus 4', provider: 'anthropic', contextWindow: 200000, maxOutputTokens: 32000, capabilities: ['streaming', 'tool-calling', 'vision', 'long-context', 'reasoning'], costPerMillionInput: 15, costPerMillionOutput: 75 },
23
+ { id: 'claude-3-5-haiku-20241022', name: 'Claude 3.5 Haiku', provider: 'anthropic', contextWindow: 200000, maxOutputTokens: 8192, capabilities: ['streaming', 'tool-calling', 'vision'], costPerMillionInput: 0.8, costPerMillionOutput: 4 },
24
+ ];
25
+ }
26
+
27
+ supports(capability: ModelCapability): boolean {
28
+ return ['streaming', 'tool-calling', 'vision', 'long-context'].includes(capability);
29
+ }
30
+
31
+ async invoke(input: ModelInvocation): Promise<ModelResult> {
32
+ const body = this.buildBody(input);
33
+ const res = await fetch(`${this.baseUrl}/v1/messages`, {
34
+ method: 'POST',
35
+ headers: {
36
+ 'Content-Type': 'application/json',
37
+ 'x-api-key': this.apiKey,
38
+ 'anthropic-version': '2023-06-01',
39
+ },
40
+ body: JSON.stringify(body),
41
+ signal: input.signal,
42
+ });
43
+ if (!res.ok) throw new Error(`Anthropic API error: ${res.status} ${await res.text()}`);
44
+ const data = await res.json() as any;
45
+ return this.parseResponse(data);
46
+ }
47
+
48
+ async *stream(input: ModelInvocation): AsyncIterable<ModelStreamEvent> {
49
+ const body = { ...this.buildBody(input), stream: true };
50
+ const res = await fetch(`${this.baseUrl}/v1/messages`, {
51
+ method: 'POST',
52
+ headers: {
53
+ 'Content-Type': 'application/json',
54
+ 'x-api-key': this.apiKey,
55
+ 'anthropic-version': '2023-06-01',
56
+ },
57
+ body: JSON.stringify(body),
58
+ signal: input.signal,
59
+ });
60
+ if (!res.ok) throw new Error(`Anthropic API error: ${res.status} ${await res.text()}`);
61
+
62
+ const reader = res.body!.getReader();
63
+ const decoder = new TextDecoder();
64
+ let buffer = '';
65
+ let fullText = '';
66
+ const toolCalls: Array<{ id: string; name: string; args: string }> = [];
67
+ let currentToolIdx = -1;
68
+
69
+ while (true) {
70
+ const { done, value } = await reader.read();
71
+ if (done) break;
72
+ buffer += decoder.decode(value, { stream: true });
73
+ const lines = buffer.split('\n');
74
+ buffer = lines.pop() ?? '';
75
+
76
+ for (const line of lines) {
77
+ if (!line.startsWith('data: ')) continue;
78
+ const event = JSON.parse(line.slice(6)) as any;
79
+
80
+ if (event.type === 'content_block_start' && event.content_block?.type === 'tool_use') {
81
+ const tc = { id: event.content_block.id, name: event.content_block.name, args: '' };
82
+ toolCalls.push(tc);
83
+ currentToolIdx = toolCalls.length - 1;
84
+ yield { type: 'tool-call-start', id: tc.id, name: tc.name };
85
+ } else if (event.type === 'content_block_delta') {
86
+ if (event.delta?.type === 'text_delta') {
87
+ fullText += event.delta.text;
88
+ yield { type: 'text-delta', text: event.delta.text };
89
+ } else if (event.delta?.type === 'input_json_delta' && currentToolIdx >= 0) {
90
+ toolCalls[currentToolIdx]!.args += event.delta.partial_json;
91
+ yield { type: 'tool-call-delta', id: toolCalls[currentToolIdx]!.id, argumentsDelta: event.delta.partial_json };
92
+ }
93
+ } else if (event.type === 'content_block_stop' && currentToolIdx >= 0) {
94
+ yield { type: 'tool-call-end', id: toolCalls[currentToolIdx]!.id };
95
+ currentToolIdx = -1;
96
+ } else if (event.type === 'message_delta') {
97
+ const usage: TokenUsage = {
98
+ promptTokens: event.usage?.input_tokens ?? 0,
99
+ completionTokens: event.usage?.output_tokens ?? 0,
100
+ totalTokens: (event.usage?.input_tokens ?? 0) + (event.usage?.output_tokens ?? 0),
101
+ };
102
+ const tcReqs: ToolCallRequest[] = toolCalls.map((t) => ({ id: t.id, name: t.name, arguments: t.args }));
103
+ yield {
104
+ type: 'finish',
105
+ result: { content: fullText, toolCalls: tcReqs.length ? tcReqs : undefined, usage, finishReason: tcReqs.length ? 'tool_calls' : 'stop' },
106
+ };
107
+ }
108
+ }
109
+ }
110
+ }
111
+
112
+ private buildBody(input: ModelInvocation): Record<string, unknown> {
113
+ const systemMsg = input.messages.find((m) => m.role === 'system');
114
+ const nonSystemMsgs = input.messages.filter((m) => m.role !== 'system');
115
+
116
+ const messages = nonSystemMsgs.map((m) => {
117
+ if (m.role === 'tool') {
118
+ return { role: 'user', content: [{ type: 'tool_result', tool_use_id: m.toolCallId, content: m.content }] };
119
+ }
120
+ if (m.role === 'assistant' && m.toolCalls?.length) {
121
+ const content: any[] = [];
122
+ if (m.content) content.push({ type: 'text', text: m.content });
123
+ for (const tc of m.toolCalls) {
124
+ content.push({ type: 'tool_use', id: tc.id, name: tc.name, input: JSON.parse(tc.arguments) });
125
+ }
126
+ return { role: 'assistant', content };
127
+ }
128
+ return { role: m.role === 'user' ? 'user' : 'assistant', content: m.content };
129
+ });
130
+
131
+ const body: Record<string, unknown> = { model: input.model, messages, max_tokens: input.maxTokens ?? 8192 };
132
+ if (systemMsg) body.system = systemMsg.content;
133
+ if (input.tools?.length) {
134
+ body.tools = input.tools.map((t) => ({ name: t.name, description: t.description, input_schema: t.parameters }));
135
+ }
136
+ if (input.temperature !== undefined) body.temperature = input.temperature;
137
+ return body;
138
+ }
139
+
140
+ private parseResponse(data: any): ModelResult {
141
+ let content = '';
142
+ const toolCalls: ToolCallRequest[] = [];
143
+ for (const block of data.content ?? []) {
144
+ if (block.type === 'text') content += block.text;
145
+ if (block.type === 'tool_use') toolCalls.push({ id: block.id, name: block.name, arguments: JSON.stringify(block.input) });
146
+ }
147
+ const usage: TokenUsage = {
148
+ promptTokens: data.usage?.input_tokens ?? 0,
149
+ completionTokens: data.usage?.output_tokens ?? 0,
150
+ totalTokens: (data.usage?.input_tokens ?? 0) + (data.usage?.output_tokens ?? 0),
151
+ };
152
+ return {
153
+ content,
154
+ toolCalls: toolCalls.length ? toolCalls : undefined,
155
+ usage,
156
+ finishReason: toolCalls.length ? 'tool_calls' : data.stop_reason === 'max_tokens' ? 'length' : 'stop',
157
+ };
158
+ }
159
+ }
src/providers/gemini/index.ts ADDED
@@ -0,0 +1,130 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ // ─── Gemini Provider Adapter ────────────────────────────────────────────────
2
+ import type {
3
+ ProviderAdapter, ProviderConfig, ModelInfo, ModelCapability,
4
+ ModelInvocation, ModelResult, ModelStreamEvent, ToolCallRequest,
5
+ } from '../../core/provider/index.js';
6
+ import type { TokenUsage } from '../../core/events/index.js';
7
+
8
+ export class GeminiProvider implements ProviderAdapter {
9
+ id = 'gemini';
10
+ label = 'Google Gemini';
11
+ private apiKey: string;
12
+ private baseUrl: string;
13
+
14
+ constructor(config: ProviderConfig) {
15
+ this.apiKey = config.apiKey ?? process.env['GEMINI_API_KEY'] ?? '';
16
+ this.baseUrl = config.baseUrl ?? 'https://generativelanguage.googleapis.com/v1beta';
17
+ }
18
+
19
+ async listModels(): Promise<ModelInfo[]> {
20
+ return [
21
+ { id: 'gemini-2.5-pro', name: 'Gemini 2.5 Pro', provider: 'gemini', contextWindow: 1000000, maxOutputTokens: 65536, capabilities: ['streaming', 'tool-calling', 'vision', 'long-context', 'reasoning'], costPerMillionInput: 1.25, costPerMillionOutput: 10 },
22
+ { id: 'gemini-2.5-flash', name: 'Gemini 2.5 Flash', provider: 'gemini', contextWindow: 1000000, maxOutputTokens: 65536, capabilities: ['streaming', 'tool-calling', 'vision', 'long-context'], costPerMillionInput: 0.15, costPerMillionOutput: 0.6 },
23
+ ];
24
+ }
25
+
26
+ supports(capability: ModelCapability): boolean {
27
+ return ['streaming', 'tool-calling', 'vision', 'long-context'].includes(capability);
28
+ }
29
+
30
+ async invoke(input: ModelInvocation): Promise<ModelResult> {
31
+ const body = this.buildBody(input);
32
+ const url = `${this.baseUrl}/models/${input.model}:generateContent?key=${this.apiKey}`;
33
+ const res = await fetch(url, {
34
+ method: 'POST',
35
+ headers: { 'Content-Type': 'application/json' },
36
+ body: JSON.stringify(body),
37
+ signal: input.signal,
38
+ });
39
+ if (!res.ok) throw new Error(`Gemini API error: ${res.status} ${await res.text()}`);
40
+ const data = await res.json() as any;
41
+ return this.parseResponse(data);
42
+ }
43
+
44
+ async *stream(input: ModelInvocation): AsyncIterable<ModelStreamEvent> {
45
+ const body = this.buildBody(input);
46
+ const url = `${this.baseUrl}/models/${input.model}:streamGenerateContent?key=${this.apiKey}&alt=sse`;
47
+ const res = await fetch(url, {
48
+ method: 'POST',
49
+ headers: { 'Content-Type': 'application/json' },
50
+ body: JSON.stringify(body),
51
+ signal: input.signal,
52
+ });
53
+ if (!res.ok) throw new Error(`Gemini API error: ${res.status} ${await res.text()}`);
54
+
55
+ const reader = res.body!.getReader();
56
+ const decoder = new TextDecoder();
57
+ let buffer = '';
58
+ let fullText = '';
59
+
60
+ while (true) {
61
+ const { done, value } = await reader.read();
62
+ if (done) break;
63
+ buffer += decoder.decode(value, { stream: true });
64
+ const lines = buffer.split('\n');
65
+ buffer = lines.pop() ?? '';
66
+
67
+ for (const line of lines) {
68
+ if (!line.startsWith('data: ')) continue;
69
+ const chunk = JSON.parse(line.slice(6)) as any;
70
+ for (const part of chunk.candidates?.[0]?.content?.parts ?? []) {
71
+ if (part.text) {
72
+ fullText += part.text;
73
+ yield { type: 'text-delta', text: part.text };
74
+ }
75
+ if (part.functionCall) {
76
+ const id = `gemini-tc-${Date.now()}`;
77
+ yield { type: 'tool-call-start', id, name: part.functionCall.name };
78
+ const args = JSON.stringify(part.functionCall.args ?? {});
79
+ yield { type: 'tool-call-delta', id, argumentsDelta: args };
80
+ yield { type: 'tool-call-end', id };
81
+ }
82
+ }
83
+ }
84
+ }
85
+
86
+ const usage: TokenUsage = { promptTokens: 0, completionTokens: 0, totalTokens: 0 };
87
+ yield { type: 'finish', result: { content: fullText, usage, finishReason: 'stop' } };
88
+ }
89
+
90
+ private buildBody(input: ModelInvocation): Record<string, unknown> {
91
+ const contents: any[] = [];
92
+ let systemInstruction: string | undefined;
93
+
94
+ for (const m of input.messages) {
95
+ if (m.role === 'system') { systemInstruction = m.content; continue; }
96
+ const role = m.role === 'assistant' ? 'model' : 'user';
97
+ if (m.role === 'tool') {
98
+ contents.push({ role: 'function', parts: [{ functionResponse: { name: m.name ?? 'tool', response: { result: m.content } } }] });
99
+ } else {
100
+ contents.push({ role, parts: [{ text: m.content }] });
101
+ }
102
+ }
103
+
104
+ const body: Record<string, unknown> = { contents };
105
+ if (systemInstruction) body.systemInstruction = { parts: [{ text: systemInstruction }] };
106
+ if (input.tools?.length) {
107
+ body.tools = [{ functionDeclarations: input.tools.map((t) => ({ name: t.name, description: t.description, parameters: t.parameters })) }];
108
+ }
109
+ if (input.temperature !== undefined) body.generationConfig = { temperature: input.temperature, maxOutputTokens: input.maxTokens ?? 8192 };
110
+ return body;
111
+ }
112
+
113
+ private parseResponse(data: any): ModelResult {
114
+ let content = '';
115
+ const toolCalls: ToolCallRequest[] = [];
116
+ for (const part of data.candidates?.[0]?.content?.parts ?? []) {
117
+ if (part.text) content += part.text;
118
+ if (part.functionCall) {
119
+ toolCalls.push({ id: `gemini-tc-${Date.now()}`, name: part.functionCall.name, arguments: JSON.stringify(part.functionCall.args ?? {}) });
120
+ }
121
+ }
122
+ const meta = data.usageMetadata ?? {};
123
+ const usage: TokenUsage = {
124
+ promptTokens: meta.promptTokenCount ?? 0,
125
+ completionTokens: meta.candidatesTokenCount ?? 0,
126
+ totalTokens: meta.totalTokenCount ?? 0,
127
+ };
128
+ return { content, toolCalls: toolCalls.length ? toolCalls : undefined, usage, finishReason: toolCalls.length ? 'tool_calls' : 'stop' };
129
+ }
130
+ }
src/providers/index.ts ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ // ─── Providers barrel export ─────────────────────────────────────────────────
2
+ export { OpenAIProvider } from './openai/index.js';
3
+ export { AnthropicProvider } from './anthropic/index.js';
4
+ export { GeminiProvider } from './gemini/index.js';
5
+ export { OpenAICompatibleProvider, OpenRouterProvider } from './openrouter/index.js';
src/providers/openai/index.ts ADDED
@@ -0,0 +1,148 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ // ─── OpenAI Provider Adapter ────────────────────────────────────────────────
2
+ import type {
3
+ ProviderAdapter, ProviderConfig, ModelInfo, ModelCapability,
4
+ ModelInvocation, ModelResult, ModelStreamEvent, ToolCallRequest,
5
+ } from '../../core/provider/index.js';
6
+ import type { TokenUsage } from '../../core/events/index.js';
7
+
8
+ export class OpenAIProvider implements ProviderAdapter {
9
+ id = 'openai';
10
+ label = 'OpenAI';
11
+ private apiKey: string;
12
+ private baseUrl: string;
13
+
14
+ constructor(config: ProviderConfig) {
15
+ this.apiKey = config.apiKey ?? process.env['OPENAI_API_KEY'] ?? '';
16
+ this.baseUrl = config.baseUrl ?? 'https://api.openai.com/v1';
17
+ }
18
+
19
+ async listModels(): Promise<ModelInfo[]> {
20
+ return [
21
+ { id: 'gpt-4o', name: 'GPT-4o', provider: 'openai', contextWindow: 128000, maxOutputTokens: 16384, capabilities: ['streaming', 'tool-calling', 'vision', 'json-mode', 'structured-output'], costPerMillionInput: 2.5, costPerMillionOutput: 10 },
22
+ { id: 'gpt-4o-mini', name: 'GPT-4o Mini', provider: 'openai', contextWindow: 128000, maxOutputTokens: 16384, capabilities: ['streaming', 'tool-calling', 'json-mode', 'structured-output'], costPerMillionInput: 0.15, costPerMillionOutput: 0.6 },
23
+ { id: 'o1', name: 'o1', provider: 'openai', contextWindow: 200000, maxOutputTokens: 100000, capabilities: ['streaming', 'tool-calling', 'reasoning'], costPerMillionInput: 15, costPerMillionOutput: 60 },
24
+ ];
25
+ }
26
+
27
+ supports(capability: ModelCapability): boolean {
28
+ return ['streaming', 'tool-calling', 'vision', 'json-mode', 'structured-output'].includes(capability);
29
+ }
30
+
31
+ async invoke(input: ModelInvocation): Promise<ModelResult> {
32
+ const body = this.buildBody(input, false);
33
+ const res = await fetch(`${this.baseUrl}/chat/completions`, {
34
+ method: 'POST',
35
+ headers: { 'Content-Type': 'application/json', 'Authorization': `Bearer ${this.apiKey}` },
36
+ body: JSON.stringify(body),
37
+ signal: input.signal,
38
+ });
39
+ if (!res.ok) throw new Error(`OpenAI API error: ${res.status} ${await res.text()}`);
40
+ const data = await res.json() as any;
41
+ return this.parseResponse(data);
42
+ }
43
+
44
+ async *stream(input: ModelInvocation): AsyncIterable<ModelStreamEvent> {
45
+ const body = this.buildBody(input, true);
46
+ const res = await fetch(`${this.baseUrl}/chat/completions`, {
47
+ method: 'POST',
48
+ headers: { 'Content-Type': 'application/json', 'Authorization': `Bearer ${this.apiKey}` },
49
+ body: JSON.stringify(body),
50
+ signal: input.signal,
51
+ });
52
+ if (!res.ok) throw new Error(`OpenAI API error: ${res.status} ${await res.text()}`);
53
+
54
+ const reader = res.body!.getReader();
55
+ const decoder = new TextDecoder();
56
+ let buffer = '';
57
+ let fullText = '';
58
+ const toolCalls = new Map<number, { id: string; name: string; args: string }>();
59
+
60
+ while (true) {
61
+ const { done, value } = await reader.read();
62
+ if (done) break;
63
+ buffer += decoder.decode(value, { stream: true });
64
+ const lines = buffer.split('\n');
65
+ buffer = lines.pop() ?? '';
66
+
67
+ for (const line of lines) {
68
+ if (!line.startsWith('data: ') || line === 'data: [DONE]') continue;
69
+ const chunk = JSON.parse(line.slice(6)) as any;
70
+ const delta = chunk.choices?.[0]?.delta;
71
+ if (!delta) continue;
72
+
73
+ if (delta.content) {
74
+ fullText += delta.content;
75
+ yield { type: 'text-delta', text: delta.content };
76
+ }
77
+
78
+ if (delta.tool_calls) {
79
+ for (const tc of delta.tool_calls) {
80
+ const idx = tc.index as number;
81
+ if (tc.id) {
82
+ toolCalls.set(idx, { id: tc.id, name: tc.function?.name ?? '', args: '' });
83
+ yield { type: 'tool-call-start', id: tc.id, name: tc.function?.name ?? '' };
84
+ }
85
+ if (tc.function?.arguments) {
86
+ const existing = toolCalls.get(idx)!;
87
+ existing.args += tc.function.arguments;
88
+ yield { type: 'tool-call-delta', id: existing.id, argumentsDelta: tc.function.arguments };
89
+ }
90
+ }
91
+ }
92
+
93
+ if (chunk.choices?.[0]?.finish_reason) {
94
+ for (const [, tc] of toolCalls) {
95
+ yield { type: 'tool-call-end', id: tc.id };
96
+ }
97
+ const usage: TokenUsage = {
98
+ promptTokens: chunk.usage?.prompt_tokens ?? 0,
99
+ completionTokens: chunk.usage?.completion_tokens ?? 0,
100
+ totalTokens: chunk.usage?.total_tokens ?? 0,
101
+ };
102
+ const tcArray: ToolCallRequest[] = [...toolCalls.values()].map((t) => ({
103
+ id: t.id, name: t.name, arguments: t.args,
104
+ }));
105
+ yield {
106
+ type: 'finish',
107
+ result: { content: fullText, toolCalls: tcArray.length ? tcArray : undefined, usage, finishReason: tcArray.length ? 'tool_calls' : 'stop' },
108
+ };
109
+ }
110
+ }
111
+ }
112
+ }
113
+
114
+ private buildBody(input: ModelInvocation, stream: boolean): Record<string, unknown> {
115
+ const messages = input.messages.map((m) => {
116
+ if (m.role === 'tool') return { role: 'tool', content: m.content, tool_call_id: m.toolCallId };
117
+ if (m.toolCalls) return { role: 'assistant', content: m.content || null, tool_calls: m.toolCalls.map((tc) => ({ id: tc.id, type: 'function', function: { name: tc.name, arguments: tc.arguments } })) };
118
+ return { role: m.role, content: m.content };
119
+ });
120
+ const body: Record<string, unknown> = { model: input.model, messages, stream };
121
+ if (input.tools?.length) {
122
+ body.tools = input.tools.map((t) => ({ type: 'function', function: { name: t.name, description: t.description, parameters: t.parameters } }));
123
+ }
124
+ if (input.temperature !== undefined) body.temperature = input.temperature;
125
+ if (input.maxTokens) body.max_tokens = input.maxTokens;
126
+ if (input.jsonMode) body.response_format = { type: 'json_object' };
127
+ if (stream) body.stream_options = { include_usage: true };
128
+ return body;
129
+ }
130
+
131
+ private parseResponse(data: any): ModelResult {
132
+ const choice = data.choices[0];
133
+ const toolCalls: ToolCallRequest[] | undefined = choice.message.tool_calls?.map((tc: any) => ({
134
+ id: tc.id, name: tc.function.name, arguments: tc.function.arguments,
135
+ }));
136
+ const usage: TokenUsage = {
137
+ promptTokens: data.usage?.prompt_tokens ?? 0,
138
+ completionTokens: data.usage?.completion_tokens ?? 0,
139
+ totalTokens: data.usage?.total_tokens ?? 0,
140
+ };
141
+ return {
142
+ content: choice.message.content ?? '',
143
+ toolCalls,
144
+ usage,
145
+ finishReason: toolCalls?.length ? 'tool_calls' : choice.finish_reason === 'length' ? 'length' : 'stop',
146
+ };
147
+ }
148
+ }
src/providers/openrouter/index.ts ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ // ─── OpenAI-Compatible Provider (OpenRouter, local, etc.) ───────────────────
2
+ import { OpenAIProvider } from '../openai/index.js';
3
+ import type { ProviderConfig, ModelInfo } from '../../core/provider/index.js';
4
+
5
+ export class OpenAICompatibleProvider extends OpenAIProvider {
6
+ override id: string;
7
+ override label: string;
8
+
9
+ constructor(config: ProviderConfig & { label?: string }) {
10
+ super(config);
11
+ this.id = config.id;
12
+ this.label = config.label ?? config.id;
13
+ }
14
+
15
+ override async listModels(): Promise<ModelInfo[]> {
16
+ // For compatible endpoints, attempt to list models from the API
17
+ try {
18
+ const baseUrl = (this as any).baseUrl;
19
+ const res = await fetch(`${baseUrl}/models`, {
20
+ headers: { 'Authorization': `Bearer ${(this as any).apiKey}` },
21
+ });
22
+ if (!res.ok) return [];
23
+ const data = await res.json() as any;
24
+ return (data.data ?? []).map((m: any) => ({
25
+ id: m.id,
26
+ name: m.id,
27
+ provider: this.id,
28
+ contextWindow: m.context_length ?? 128000,
29
+ capabilities: ['streaming', 'tool-calling'] as any[],
30
+ }));
31
+ } catch {
32
+ return [];
33
+ }
34
+ }
35
+ }
36
+
37
+ // ─── OpenRouter convenience subclass ────────────────────────────────────────
38
+ export class OpenRouterProvider extends OpenAICompatibleProvider {
39
+ constructor(config: Omit<ProviderConfig, 'id'>) {
40
+ super({
41
+ ...config,
42
+ id: 'openrouter',
43
+ baseUrl: config.baseUrl ?? 'https://openrouter.ai/api/v1',
44
+ apiKey: config.apiKey ?? process.env['OPENROUTER_API_KEY'] ?? '',
45
+ label: 'OpenRouter',
46
+ } as any);
47
+ }
48
+ }
src/skills/coding/index.ts ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ // ─── Coding Skill ───────────────────────────────────────────────────────────
2
+ import type { SkillModule } from '../../core/skills/index.js';
3
+
4
+ export const codingSkill: SkillModule = {
5
+ id: 'coding',
6
+ title: 'Software Engineering',
7
+ description: 'Write, refactor, test, and debug code across languages and frameworks.',
8
+ suggestedTools: ['read_file', 'write_file', 'list_directory', 'shell_exec'],
9
+ tags: ['code', 'dev', 'engineering'],
10
+ instructions: `You are an expert software engineer. Follow these rules:
11
+
12
+ ## Planning
13
+ - Break complex tasks into subtasks. Plan before coding.
14
+ - State assumptions explicitly before implementing.
15
+
16
+ ## Code Quality
17
+ - Write clean, typed, well-documented code.
18
+ - Follow existing project conventions (formatting, naming, structure).
19
+ - Prefer small, focused functions over large monoliths.
20
+ - Add error handling for all I/O operations.
21
+
22
+ ## Verification
23
+ - After writing code, run the test suite or relevant checks.
24
+ - If tests fail, read the error, diagnose the root cause, and fix it.
25
+ - Do not declare success without verification.
26
+
27
+ ## File Operations
28
+ - Read files before modifying them to understand context.
29
+ - Make minimal targeted edits rather than rewriting entire files.
30
+ - Create new files when the change is substantial.
31
+
32
+ ## Communication
33
+ - Explain your reasoning concisely.
34
+ - Show relevant code snippets in your response.
35
+ - Report test results and any remaining issues.`,
36
+ };
src/skills/docs/index.ts ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ // ─── Docs Skill ─────────────────────────────────────────────────────────────
2
+ import type { SkillModule } from '../../core/skills/index.js';
3
+
4
+ export const docsSkill: SkillModule = {
5
+ id: 'docs',
6
+ title: 'Documentation',
7
+ description: 'Write clear, structured technical documentation, READMEs, guides, and API references.',
8
+ suggestedTools: ['read_file', 'write_file', 'list_directory'],
9
+ tags: ['docs', 'writing', 'technical-writing'],
10
+ instructions: `You are an expert technical writer. Follow these rules:
11
+
12
+ ## Structure
13
+ - Use clear hierarchical headings (h1 for title, h2 for sections, h3 for subsections).
14
+ - Start with a brief overview/summary before diving into details.
15
+ - Include a table of contents for documents longer than 3 sections.
16
+
17
+ ## Clarity
18
+ - Write for the target audience (developers, users, or operators).
19
+ - Define terms on first use.
20
+ - Use active voice and present tense.
21
+ - Keep sentences short and paragraphs focused.
22
+
23
+ ## Code Examples
24
+ - Include working code examples for every API or feature.
25
+ - Show both minimal and realistic usage patterns.
26
+ - Annotate non-obvious lines with comments.
27
+
28
+ ## Completeness
29
+ - Cover: what it is, why to use it, how to install, how to use, configuration, troubleshooting.
30
+ - Include prerequisites and environment requirements.
31
+ - Document error conditions and edge cases.
32
+
33
+ ## Format
34
+ - Use Markdown with consistent formatting.
35
+ - Use tables for structured comparisons.
36
+ - Use admonitions (> **Note:**, > **Warning:**) for important callouts.`,
37
+ };
src/skills/index.ts ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ // ─── Skills barrel export ────────────────────────────────────────────────────
2
+ export { codingSkill } from './coding/index.js';
3
+ export { researchSkill } from './research/index.js';
4
+ export { docsSkill } from './docs/index.js';
src/skills/research/index.ts ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ // ─── Research Skill ──────────────────────────────────────────────────────────
2
+ import type { SkillModule } from '../../core/skills/index.js';
3
+
4
+ export const researchSkill: SkillModule = {
5
+ id: 'research',
6
+ title: 'Research & Analysis',
7
+ description: 'Gather information from web sources, documentation, and APIs to answer questions or inform decisions.',
8
+ suggestedTools: ['web_fetch', 'read_file'],
9
+ tags: ['research', 'analysis', 'information'],
10
+ instructions: `You are a thorough researcher. Follow these rules:
11
+
12
+ ## Methodology
13
+ - Start with the primary source (official docs, original paper, authoritative API).
14
+ - Cross-reference multiple sources for claims that matter.
15
+ - Distinguish facts from opinions and speculation.
16
+ - Note when information may be outdated.
17
+
18
+ ## Output
19
+ - Structure findings with clear headings and bullet points.
20
+ - Cite sources with URLs when available.
21
+ - Highlight key findings, contradictions, and gaps.
22
+ - Provide a summary with confidence level for each major claim.
23
+
24
+ ## Web Research
25
+ - Fetch documentation pages and extract relevant sections.
26
+ - Do not hallucinate URLs or content you haven't fetched.
27
+ - If a page is unavailable, note it and try alternatives.
28
+
29
+ ## Depth
30
+ - For technical questions, go to the source code or spec.
31
+ - For market/product questions, find multiple data points.
32
+ - Always answer the actual question, not adjacent ones.`,
33
+ };
src/tools/fs/index.ts ADDED
@@ -0,0 +1,73 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ // ─── Filesystem Tool ────────────────────────────────────────────────────────
2
+ import { z } from 'zod';
3
+ import { readFile, writeFile, readdir, stat, mkdir } from 'fs/promises';
4
+ import { join, resolve } from 'path';
5
+ import type { ToolDef } from '../../core/tools/index.js';
6
+
7
+ export const readFileTool: ToolDef<{ path: string }, { content: string; size: number }> = {
8
+ id: 'fs.read',
9
+ name: 'read_file',
10
+ description: 'Read the contents of a file at the given path. Returns the text content and file size in bytes.',
11
+ inputSchema: z.object({ path: z.string().describe('Absolute or relative file path to read') }),
12
+ outputSchema: z.object({ content: z.string(), size: z.number() }),
13
+ permission: 'read',
14
+ sideEffect: 'none',
15
+ timeout: 10000,
16
+ retries: 0,
17
+ tags: ['filesystem'],
18
+ renderer: { icon: '📄', color: 'blue' },
19
+ async execute(input, ctx) {
20
+ const filePath = resolve(ctx.workDir, input.path);
21
+ const content = await readFile(filePath, 'utf-8');
22
+ const stats = await stat(filePath);
23
+ return { content, size: stats.size };
24
+ },
25
+ };
26
+
27
+ export const writeFileTool: ToolDef<{ path: string; content: string }, { written: boolean; path: string }> = {
28
+ id: 'fs.write',
29
+ name: 'write_file',
30
+ description: 'Write content to a file. Creates parent directories if needed. Overwrites existing content.',
31
+ inputSchema: z.object({
32
+ path: z.string().describe('File path to write to'),
33
+ content: z.string().describe('Content to write'),
34
+ }),
35
+ outputSchema: z.object({ written: z.boolean(), path: z.string() }),
36
+ permission: 'write',
37
+ sideEffect: 'filesystem',
38
+ timeout: 10000,
39
+ retries: 0,
40
+ tags: ['filesystem'],
41
+ renderer: { icon: '✏️', color: 'yellow' },
42
+ async execute(input, ctx) {
43
+ const filePath = resolve(ctx.workDir, input.path);
44
+ const dir = filePath.substring(0, filePath.lastIndexOf('/'));
45
+ await mkdir(dir, { recursive: true });
46
+ await writeFile(filePath, input.content, 'utf-8');
47
+ return { written: true, path: filePath };
48
+ },
49
+ };
50
+
51
+ export const listDirTool: ToolDef<{ path: string }, { entries: Array<{ name: string; type: string; size: number }> }> = {
52
+ id: 'fs.list',
53
+ name: 'list_directory',
54
+ description: 'List files and directories at the given path with type and size information.',
55
+ inputSchema: z.object({ path: z.string().describe('Directory path to list') }),
56
+ outputSchema: z.object({ entries: z.array(z.object({ name: z.string(), type: z.string(), size: z.number() })) }),
57
+ permission: 'read',
58
+ sideEffect: 'none',
59
+ timeout: 10000,
60
+ retries: 0,
61
+ tags: ['filesystem'],
62
+ renderer: { icon: '📁', color: 'blue' },
63
+ async execute(input, ctx) {
64
+ const dirPath = resolve(ctx.workDir, input.path);
65
+ const items = await readdir(dirPath, { withFileTypes: true });
66
+ const entries = await Promise.all(items.map(async (item) => {
67
+ const fullPath = join(dirPath, item.name);
68
+ const stats = await stat(fullPath).catch(() => ({ size: 0 }));
69
+ return { name: item.name, type: item.isDirectory() ? 'directory' : 'file', size: stats.size };
70
+ }));
71
+ return { entries };
72
+ },
73
+ };
src/tools/index.ts ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ // ─── Tools barrel export ─────────────────────────────────────────────────────
2
+ export { readFileTool, writeFileTool, listDirTool } from './fs/index.js';
3
+ export { shellExecTool } from './shell/index.js';
4
+ export { webFetchTool } from './web/index.js';
src/tools/shell/index.ts ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ // ─── Shell Tool ─────────────────────────────────────────────────────────────
2
+ import { z } from 'zod';
3
+ import { exec } from 'child_process';
4
+ import { promisify } from 'util';
5
+ import type { ToolDef } from '../../core/tools/index.js';
6
+
7
+ const execAsync = promisify(exec);
8
+
9
+ export const shellExecTool: ToolDef<
10
+ { command: string; timeout?: number },
11
+ { stdout: string; stderr: string; exitCode: number }
12
+ > = {
13
+ id: 'shell.exec',
14
+ name: 'shell_exec',
15
+ description: 'Execute a shell command and return stdout, stderr, and exit code. Use for running builds, tests, git commands, or any CLI tool.',
16
+ inputSchema: z.object({
17
+ command: z.string().describe('Shell command to execute'),
18
+ timeout: z.number().optional().describe('Timeout in milliseconds (default: 30000)'),
19
+ }),
20
+ outputSchema: z.object({
21
+ stdout: z.string(),
22
+ stderr: z.string(),
23
+ exitCode: z.number(),
24
+ }),
25
+ permission: 'exec',
26
+ sideEffect: 'process',
27
+ timeout: 60000,
28
+ retries: 0,
29
+ tags: ['shell', 'exec'],
30
+ renderer: { icon: '⚡', color: 'green' },
31
+ async execute(input, ctx) {
32
+ ctx.emit(`Executing: ${input.command}`);
33
+ const timeout = input.timeout ?? 30000;
34
+ try {
35
+ const { stdout, stderr } = await execAsync(input.command, {
36
+ cwd: ctx.workDir,
37
+ timeout,
38
+ maxBuffer: 1024 * 1024 * 10, // 10MB
39
+ signal: ctx.signal,
40
+ });
41
+ return { stdout: stdout.slice(0, 50000), stderr: stderr.slice(0, 10000), exitCode: 0 };
42
+ } catch (err: any) {
43
+ return {
44
+ stdout: (err.stdout ?? '').slice(0, 50000),
45
+ stderr: (err.stderr ?? err.message ?? '').slice(0, 10000),
46
+ exitCode: err.code ?? 1,
47
+ };
48
+ }
49
+ },
50
+ };
src/tools/web/index.ts ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ // ─── Web Fetch Tool ─────────────────────────────────────────────────────────
2
+ import { z } from 'zod';
3
+ import type { ToolDef } from '../../core/tools/index.js';
4
+
5
+ export const webFetchTool: ToolDef<
6
+ { url: string; method?: string; headers?: Record<string, string>; body?: string },
7
+ { status: number; headers: Record<string, string>; body: string }
8
+ > = {
9
+ id: 'web.fetch',
10
+ name: 'web_fetch',
11
+ description: 'Make an HTTP request to a URL. Returns status, headers, and body (truncated to 100KB). Useful for reading docs, APIs, and web pages.',
12
+ inputSchema: z.object({
13
+ url: z.string().url().describe('URL to fetch'),
14
+ method: z.string().optional().describe('HTTP method (default: GET)'),
15
+ headers: z.record(z.string()).optional().describe('Request headers'),
16
+ body: z.string().optional().describe('Request body for POST/PUT'),
17
+ }),
18
+ outputSchema: z.object({
19
+ status: z.number(),
20
+ headers: z.record(z.string()),
21
+ body: z.string(),
22
+ }),
23
+ permission: 'network',
24
+ sideEffect: 'network',
25
+ timeout: 30000,
26
+ retries: 1,
27
+ tags: ['web', 'network', 'http'],
28
+ renderer: { icon: '🌐', color: 'cyan' },
29
+ async execute(input, ctx) {
30
+ ctx.emit(`Fetching: ${input.url}`);
31
+ const res = await fetch(input.url, {
32
+ method: input.method ?? 'GET',
33
+ headers: input.headers,
34
+ body: input.body,
35
+ signal: ctx.signal,
36
+ });
37
+ const body = await res.text();
38
+ const headers: Record<string, string> = {};
39
+ res.headers.forEach((value, key) => { headers[key] = value; });
40
+ return {
41
+ status: res.status,
42
+ headers,
43
+ body: body.slice(0, 100_000),
44
+ };
45
+ },
46
+ };
tsconfig.json ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "compilerOptions": {
3
+ "target": "ES2022",
4
+ "module": "ESNext",
5
+ "moduleResolution": "bundler",
6
+ "lib": ["ES2022"],
7
+ "outDir": "./dist",
8
+ "rootDir": "./src",
9
+ "strict": true,
10
+ "esModuleInterop": true,
11
+ "skipLibCheck": true,
12
+ "forceConsistentCasingInFileNames": true,
13
+ "resolveJsonModule": true,
14
+ "declaration": true,
15
+ "declarationMap": true,
16
+ "sourceMap": true,
17
+ "noUncheckedIndexedAccess": true,
18
+ "noUnusedLocals": true,
19
+ "noUnusedParameters": true,
20
+ "exactOptionalPropertyTypes": false,
21
+ "paths": {
22
+ "@core/*": ["./src/core/*"],
23
+ "@providers/*": ["./src/providers/*"],
24
+ "@cli/*": ["./src/cli/*"],
25
+ "@tools/*": ["./src/tools/*"],
26
+ "@skills/*": ["./src/skills/*"]
27
+ }
28
+ },
29
+ "include": ["src/**/*.ts"],
30
+ "exclude": ["node_modules", "dist"]
31
+ }