File size: 4,776 Bytes
fc93158 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 | import { Type } from "@sinclair/typebox";
import { optionalStringEnum, stringEnum } from "../schema/typebox.js";
const BROWSER_ACT_KINDS = [
"click",
"type",
"press",
"hover",
"drag",
"select",
"fill",
"resize",
"wait",
"evaluate",
"close",
] as const;
const BROWSER_TOOL_ACTIONS = [
"status",
"start",
"stop",
"profiles",
"tabs",
"open",
"focus",
"close",
"snapshot",
"screenshot",
"navigate",
"console",
"pdf",
"upload",
"dialog",
"act",
] as const;
const BROWSER_TARGETS = ["sandbox", "host", "node"] as const;
const BROWSER_SNAPSHOT_FORMATS = ["aria", "ai"] as const;
const BROWSER_SNAPSHOT_MODES = ["efficient"] as const;
const BROWSER_SNAPSHOT_REFS = ["role", "aria"] as const;
const BROWSER_IMAGE_TYPES = ["png", "jpeg"] as const;
// NOTE: Using a flattened object schema instead of Type.Union([Type.Object(...), ...])
// because Claude API on Vertex AI rejects nested anyOf schemas as invalid JSON Schema.
// The discriminator (kind) determines which properties are relevant; runtime validates.
const BrowserActSchema = Type.Object({
kind: stringEnum(BROWSER_ACT_KINDS),
// Common fields
targetId: Type.Optional(Type.String()),
ref: Type.Optional(Type.String()),
// click
doubleClick: Type.Optional(Type.Boolean()),
button: Type.Optional(Type.String()),
modifiers: Type.Optional(Type.Array(Type.String())),
// type
text: Type.Optional(Type.String()),
submit: Type.Optional(Type.Boolean()),
slowly: Type.Optional(Type.Boolean()),
// press
key: Type.Optional(Type.String()),
delayMs: Type.Optional(Type.Number()),
// drag
startRef: Type.Optional(Type.String()),
endRef: Type.Optional(Type.String()),
// select
values: Type.Optional(Type.Array(Type.String())),
// fill - use permissive array of objects
fields: Type.Optional(Type.Array(Type.Object({}, { additionalProperties: true }))),
// resize
width: Type.Optional(Type.Number()),
height: Type.Optional(Type.Number()),
// wait
timeMs: Type.Optional(Type.Number()),
selector: Type.Optional(Type.String()),
url: Type.Optional(Type.String()),
loadState: Type.Optional(Type.String()),
textGone: Type.Optional(Type.String()),
timeoutMs: Type.Optional(Type.Number()),
// evaluate
fn: Type.Optional(Type.String()),
});
// IMPORTANT: OpenAI function tool schemas must have a top-level `type: "object"`.
// A root-level `Type.Union([...])` compiles to `{ anyOf: [...] }` (no `type`),
// which OpenAI rejects ("Invalid schema ... type: None"). Keep this schema an object.
export const BrowserToolSchema = Type.Object({
action: stringEnum(BROWSER_TOOL_ACTIONS),
target: optionalStringEnum(BROWSER_TARGETS),
node: Type.Optional(Type.String()),
profile: Type.Optional(Type.String()),
targetUrl: Type.Optional(Type.String()),
url: Type.Optional(Type.String()),
targetId: Type.Optional(Type.String()),
limit: Type.Optional(Type.Number()),
maxChars: Type.Optional(Type.Number()),
mode: optionalStringEnum(BROWSER_SNAPSHOT_MODES),
snapshotFormat: optionalStringEnum(BROWSER_SNAPSHOT_FORMATS),
refs: optionalStringEnum(BROWSER_SNAPSHOT_REFS),
interactive: Type.Optional(Type.Boolean()),
compact: Type.Optional(Type.Boolean()),
depth: Type.Optional(Type.Number()),
selector: Type.Optional(Type.String()),
frame: Type.Optional(Type.String()),
labels: Type.Optional(Type.Boolean()),
fullPage: Type.Optional(Type.Boolean()),
ref: Type.Optional(Type.String()),
element: Type.Optional(Type.String()),
type: optionalStringEnum(BROWSER_IMAGE_TYPES),
level: Type.Optional(Type.String()),
paths: Type.Optional(Type.Array(Type.String())),
inputRef: Type.Optional(Type.String()),
timeoutMs: Type.Optional(Type.Number()),
accept: Type.Optional(Type.Boolean()),
promptText: Type.Optional(Type.String()),
// Legacy flattened act params (preferred: request={...})
kind: Type.Optional(stringEnum(BROWSER_ACT_KINDS)),
doubleClick: Type.Optional(Type.Boolean()),
button: Type.Optional(Type.String()),
modifiers: Type.Optional(Type.Array(Type.String())),
text: Type.Optional(Type.String()),
submit: Type.Optional(Type.Boolean()),
slowly: Type.Optional(Type.Boolean()),
key: Type.Optional(Type.String()),
delayMs: Type.Optional(Type.Number()),
startRef: Type.Optional(Type.String()),
endRef: Type.Optional(Type.String()),
values: Type.Optional(Type.Array(Type.String())),
fields: Type.Optional(Type.Array(Type.Object({}, { additionalProperties: true }))),
width: Type.Optional(Type.Number()),
height: Type.Optional(Type.Number()),
timeMs: Type.Optional(Type.Number()),
textGone: Type.Optional(Type.String()),
loadState: Type.Optional(Type.String()),
fn: Type.Optional(Type.String()),
request: Type.Optional(BrowserActSchema),
});
|