MCP: Image refs (#1987)
Browse files* Add image reference support for MCP tool calls
Introduces image reference resolution and attachment for MCP tool arguments, allowing tools to accept lightweight image reference strings (e.g. 'latest', 'image_1') and receive resolved image payloads. Updates tool invocation and flow logic to use these references, and improves tool prompt instructions for image input handling.
* Enable multiple file uploads in ChatInput
* Refactor multimodal model selection logic
* Remove 'latest' image reference support
Eliminates handling of the 'latest' image reference in image resolver logic and updates related comments and prompts to only support 'image_1', 'image_2', etc. This simplifies image reference usage and clarifies instructions for tool input parameters.
* Refactor imageRefs to fileRefs for tool payloads
- .env +2 -2
- README.md +1 -1
- chart/env/dev.yaml +1 -1
- chart/env/prod.yaml +1 -1
- src/lib/components/chat/ChatInput.svelte +2 -1
- src/lib/server/router/endpoint.ts +5 -30
- src/lib/server/router/multimodal.ts +28 -0
- src/lib/server/textGeneration/mcp/fileRefs.ts +168 -0
- src/lib/server/textGeneration/mcp/routerResolution.ts +9 -6
- src/lib/server/textGeneration/mcp/runMcpFlow.ts +5 -1
- src/lib/server/textGeneration/mcp/toolInvocation.ts +8 -0
- src/lib/server/textGeneration/utils/toolPrompt.ts +7 -1
|
@@ -72,9 +72,9 @@ LLM_ROUTER_MAX_ASSISTANT_LENGTH=500
|
|
| 72 |
# Maximum length (in characters) for previous user messages sent to router (latest user message not trimmed, default 400)
|
| 73 |
LLM_ROUTER_MAX_PREV_USER_LENGTH=400
|
| 74 |
|
| 75 |
-
# Enable router multimodal
|
| 76 |
LLM_ROUTER_ENABLE_MULTIMODAL=
|
| 77 |
-
#
|
| 78 |
LLM_ROUTER_MULTIMODAL_MODEL=
|
| 79 |
|
| 80 |
# Enable router tool support (set to true to allow tool calling via router)
|
|
|
|
| 72 |
# Maximum length (in characters) for previous user messages sent to router (latest user message not trimmed, default 400)
|
| 73 |
LLM_ROUTER_MAX_PREV_USER_LENGTH=400
|
| 74 |
|
| 75 |
+
# Enable router multimodal handling (set to true to allow image inputs via router)
|
| 76 |
LLM_ROUTER_ENABLE_MULTIMODAL=
|
| 77 |
+
# Required when LLM_ROUTER_ENABLE_MULTIMODAL=true: id or name of the multimodal model to use for image requests
|
| 78 |
LLM_ROUTER_MULTIMODAL_MODEL=
|
| 79 |
|
| 80 |
# Enable router tool support (set to true to allow tool calling via router)
|
|
@@ -144,7 +144,7 @@ When you select Omni in the UI, Chat UI will:
|
|
| 144 |
|
| 145 |
Tool and multimodal shortcuts:
|
| 146 |
|
| 147 |
-
- Multimodal: If `LLM_ROUTER_ENABLE_MULTIMODAL=true` and the user sends an image, the router bypasses Arch and uses
|
| 148 |
- Tools: If `LLM_ROUTER_ENABLE_TOOLS=true` and the user has at least one MCP server enabled, the router bypasses Arch and uses `LLM_ROUTER_TOOLS_MODEL`. If that model is missing or misconfigured, it falls back to Arch routing. Route name: `agentic`.
|
| 149 |
|
| 150 |
### MCP Tools (Optional)
|
|
|
|
| 144 |
|
| 145 |
Tool and multimodal shortcuts:
|
| 146 |
|
| 147 |
+
- Multimodal: If `LLM_ROUTER_ENABLE_MULTIMODAL=true` and the user sends an image, the router bypasses Arch and uses the model specified in `LLM_ROUTER_MULTIMODAL_MODEL`. Route name: `multimodal`.
|
| 148 |
- Tools: If `LLM_ROUTER_ENABLE_TOOLS=true` and the user has at least one MCP server enabled, the router bypasses Arch and uses `LLM_ROUTER_TOOLS_MODEL`. If that model is missing or misconfigured, it falls back to Arch routing. Route name: `agentic`.
|
| 149 |
|
| 150 |
### MCP Tools (Optional)
|
|
@@ -67,7 +67,7 @@ envVars:
|
|
| 67 |
LLM_ROUTER_OTHER_ROUTE: "casual_conversation"
|
| 68 |
LLM_ROUTER_ARCH_TIMEOUT_MS: "10000"
|
| 69 |
LLM_ROUTER_ENABLE_MULTIMODAL: "true"
|
| 70 |
-
LLM_ROUTER_MULTIMODAL_MODEL: "Qwen/Qwen3-VL-235B-A22B-
|
| 71 |
LLM_ROUTER_ENABLE_TOOLS: "true"
|
| 72 |
LLM_ROUTER_TOOLS_MODEL: "moonshotai/Kimi-K2-Instruct-0905"
|
| 73 |
MCP_SERVERS: >
|
|
|
|
| 67 |
LLM_ROUTER_OTHER_ROUTE: "casual_conversation"
|
| 68 |
LLM_ROUTER_ARCH_TIMEOUT_MS: "10000"
|
| 69 |
LLM_ROUTER_ENABLE_MULTIMODAL: "true"
|
| 70 |
+
LLM_ROUTER_MULTIMODAL_MODEL: "Qwen/Qwen3-VL-235B-A22B-Instruct"
|
| 71 |
LLM_ROUTER_ENABLE_TOOLS: "true"
|
| 72 |
LLM_ROUTER_TOOLS_MODEL: "moonshotai/Kimi-K2-Instruct-0905"
|
| 73 |
MCP_SERVERS: >
|
|
@@ -77,7 +77,7 @@ envVars:
|
|
| 77 |
LLM_ROUTER_OTHER_ROUTE: "casual_conversation"
|
| 78 |
LLM_ROUTER_ARCH_TIMEOUT_MS: "10000"
|
| 79 |
LLM_ROUTER_ENABLE_MULTIMODAL: "true"
|
| 80 |
-
LLM_ROUTER_MULTIMODAL_MODEL: "Qwen/Qwen3-VL-
|
| 81 |
LLM_ROUTER_ENABLE_TOOLS: "true"
|
| 82 |
LLM_ROUTER_TOOLS_MODEL: "moonshotai/Kimi-K2-Instruct-0905"
|
| 83 |
MCP_SERVERS: >
|
|
|
|
| 77 |
LLM_ROUTER_OTHER_ROUTE: "casual_conversation"
|
| 78 |
LLM_ROUTER_ARCH_TIMEOUT_MS: "10000"
|
| 79 |
LLM_ROUTER_ENABLE_MULTIMODAL: "true"
|
| 80 |
+
LLM_ROUTER_MULTIMODAL_MODEL: "Qwen/Qwen3-VL-235B-A22B-Instruct"
|
| 81 |
LLM_ROUTER_ENABLE_TOOLS: "true"
|
| 82 |
LLM_ROUTER_TOOLS_MODEL: "moonshotai/Kimi-K2-Instruct-0905"
|
| 83 |
MCP_SERVERS: >
|
|
@@ -241,6 +241,7 @@
|
|
| 241 |
class="absolute hidden size-0"
|
| 242 |
aria-label="Upload file"
|
| 243 |
type="file"
|
|
|
|
| 244 |
onchange={onFileChange}
|
| 245 |
onclick={(e) => {
|
| 246 |
if (requireAuthUser()) {
|
|
@@ -274,7 +275,7 @@
|
|
| 274 |
onSelect={() => openFilePickerImage()}
|
| 275 |
>
|
| 276 |
<CarbonImage class="size-4 opacity-90 dark:opacity-80" />
|
| 277 |
-
Add image
|
| 278 |
</DropdownMenu.Item>
|
| 279 |
{/if}
|
| 280 |
|
|
|
|
| 241 |
class="absolute hidden size-0"
|
| 242 |
aria-label="Upload file"
|
| 243 |
type="file"
|
| 244 |
+
multiple
|
| 245 |
onchange={onFileChange}
|
| 246 |
onclick={(e) => {
|
| 247 |
if (requireAuthUser()) {
|
|
|
|
| 275 |
onSelect={() => openFilePickerImage()}
|
| 276 |
>
|
| 277 |
<CarbonImage class="size-4 opacity-90 dark:opacity-80" />
|
| 278 |
+
Add image(s)
|
| 279 |
</DropdownMenu.Item>
|
| 280 |
{/if}
|
| 281 |
|
|
@@ -18,6 +18,7 @@ import {
|
|
| 18 |
pickToolsCapableModel,
|
| 19 |
ROUTER_TOOLS_ROUTE,
|
| 20 |
} from "./toolsRoute";
|
|
|
|
| 21 |
|
| 22 |
const REASONING_BLOCK_REGEX = /<think>[\s\S]*?(?:<\/think>|$)/g;
|
| 23 |
|
|
@@ -176,43 +177,17 @@ export async function makeRouterEndpoint(routerModel: ProcessedModel): Promise<E
|
|
| 176 |
for await (const ev of gen) yield ev;
|
| 177 |
}
|
| 178 |
|
| 179 |
-
|
|
|
|
| 180 |
try {
|
| 181 |
const all = await getModels();
|
| 182 |
-
|
| 183 |
-
// Check if a specific multimodal model is configured via env variable
|
| 184 |
-
const preferredModelId = config.LLM_ROUTER_MULTIMODAL_MODEL;
|
| 185 |
-
if (preferredModelId) {
|
| 186 |
-
const preferredModel = all?.find(
|
| 187 |
-
(m) => (m.id === preferredModelId || m.name === preferredModelId) && m.multimodal
|
| 188 |
-
);
|
| 189 |
-
if (preferredModel) {
|
| 190 |
-
logger.info(
|
| 191 |
-
{ model: preferredModel.id ?? preferredModel.name },
|
| 192 |
-
"[router] using configured multimodal model"
|
| 193 |
-
);
|
| 194 |
-
return preferredModel.id ?? preferredModel.name;
|
| 195 |
-
}
|
| 196 |
-
logger.warn(
|
| 197 |
-
{ configuredModel: preferredModelId },
|
| 198 |
-
"[router] configured multimodal model not found or not multimodal, falling back to first available"
|
| 199 |
-
);
|
| 200 |
-
}
|
| 201 |
-
|
| 202 |
-
// Fallback to first multimodal model
|
| 203 |
-
const first = all?.find((m) => !m.isRouter && m.multimodal);
|
| 204 |
-
return first?.id ?? first?.name;
|
| 205 |
} catch (e) {
|
| 206 |
logger.warn({ err: String(e) }, "[router] failed to load models for multimodal lookup");
|
| 207 |
-
return undefined;
|
| 208 |
}
|
| 209 |
-
}
|
| 210 |
-
|
| 211 |
-
if (routerMultimodalEnabled && hasImageInput) {
|
| 212 |
-
const multimodalCandidate = await findFirstMultimodalCandidateId();
|
| 213 |
if (!multimodalCandidate) {
|
| 214 |
throw new Error(
|
| 215 |
-
"
|
| 216 |
);
|
| 217 |
}
|
| 218 |
|
|
|
|
| 18 |
pickToolsCapableModel,
|
| 19 |
ROUTER_TOOLS_ROUTE,
|
| 20 |
} from "./toolsRoute";
|
| 21 |
+
import { getConfiguredMultimodalModelId } from "./multimodal";
|
| 22 |
|
| 23 |
const REASONING_BLOCK_REGEX = /<think>[\s\S]*?(?:<\/think>|$)/g;
|
| 24 |
|
|
|
|
| 177 |
for await (const ev of gen) yield ev;
|
| 178 |
}
|
| 179 |
|
| 180 |
+
if (routerMultimodalEnabled && hasImageInput) {
|
| 181 |
+
let multimodalCandidate: string | undefined;
|
| 182 |
try {
|
| 183 |
const all = await getModels();
|
| 184 |
+
multimodalCandidate = getConfiguredMultimodalModelId(all);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 185 |
} catch (e) {
|
| 186 |
logger.warn({ err: String(e) }, "[router] failed to load models for multimodal lookup");
|
|
|
|
| 187 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
| 188 |
if (!multimodalCandidate) {
|
| 189 |
throw new Error(
|
| 190 |
+
"Router multimodal is enabled but LLM_ROUTER_MULTIMODAL_MODEL is not correctly configured. Remove the image or configure a multimodal model via LLM_ROUTER_MULTIMODAL_MODEL."
|
| 191 |
);
|
| 192 |
}
|
| 193 |
|
|
@@ -0,0 +1,28 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import { config } from "$lib/server/config";
|
| 2 |
+
import type { ProcessedModel } from "../models";
|
| 3 |
+
|
| 4 |
+
/**
|
| 5 |
+
* Returns the configured multimodal model when it exists and is valid.
|
| 6 |
+
* - Requires LLM_ROUTER_MULTIMODAL_MODEL to be set (id or name).
|
| 7 |
+
* - Ignores router aliases and non-multimodal models.
|
| 8 |
+
*/
|
| 9 |
+
export function findConfiguredMultimodalModel(
|
| 10 |
+
models: ProcessedModel[] | undefined
|
| 11 |
+
): ProcessedModel | undefined {
|
| 12 |
+
const preferredModelId = (config.LLM_ROUTER_MULTIMODAL_MODEL || "").trim();
|
| 13 |
+
if (!preferredModelId || !models?.length) return undefined;
|
| 14 |
+
|
| 15 |
+
return models.find(
|
| 16 |
+
(candidate) =>
|
| 17 |
+
(candidate.id === preferredModelId || candidate.name === preferredModelId) &&
|
| 18 |
+
!candidate.isRouter &&
|
| 19 |
+
candidate.multimodal
|
| 20 |
+
);
|
| 21 |
+
}
|
| 22 |
+
|
| 23 |
+
export function getConfiguredMultimodalModelId(
|
| 24 |
+
models: ProcessedModel[] | undefined
|
| 25 |
+
): string | undefined {
|
| 26 |
+
const model = findConfiguredMultimodalModel(models);
|
| 27 |
+
return model?.id ?? model?.name;
|
| 28 |
+
}
|
|
@@ -0,0 +1,168 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import type { EndpointMessage } from "../../endpoints/endpoints";
|
| 2 |
+
|
| 3 |
+
export type FileRefPayload = {
|
| 4 |
+
name: string;
|
| 5 |
+
mime: string;
|
| 6 |
+
base64: string;
|
| 7 |
+
};
|
| 8 |
+
|
| 9 |
+
export type RefKind = {
|
| 10 |
+
prefix: string;
|
| 11 |
+
matches: (mime: string) => boolean;
|
| 12 |
+
toDataUrl?: (payload: FileRefPayload) => string;
|
| 13 |
+
};
|
| 14 |
+
|
| 15 |
+
export type ResolvedFileRef = FileRefPayload & { refKind: RefKind };
|
| 16 |
+
export type FileRefResolver = (ref: string) => ResolvedFileRef | undefined;
|
| 17 |
+
|
| 18 |
+
const IMAGE_REF_KIND: RefKind = {
|
| 19 |
+
prefix: "image",
|
| 20 |
+
matches: (mime) => typeof mime === "string" && mime.startsWith("image/"),
|
| 21 |
+
toDataUrl: (payload) => `data:${payload.mime};base64,${payload.base64}`,
|
| 22 |
+
};
|
| 23 |
+
|
| 24 |
+
const DEFAULT_REF_KINDS: RefKind[] = [IMAGE_REF_KIND];
|
| 25 |
+
|
| 26 |
+
/**
|
| 27 |
+
* Build a resolver that maps short ref strings (e.g. "image_1") to the
|
| 28 |
+
* corresponding file payload for the latest user message containing files of
|
| 29 |
+
* the allowed kinds. Currently only images are exposed to end users, but the
|
| 30 |
+
* plumbing supports additional kinds later.
|
| 31 |
+
*/
|
| 32 |
+
export function buildFileRefResolver(
|
| 33 |
+
messages: EndpointMessage[],
|
| 34 |
+
refKinds: RefKind[] = DEFAULT_REF_KINDS
|
| 35 |
+
): FileRefResolver | undefined {
|
| 36 |
+
if (!Array.isArray(refKinds) || refKinds.length === 0) return undefined;
|
| 37 |
+
|
| 38 |
+
// Find the newest user message that has at least one matching file
|
| 39 |
+
let lastUserWithFiles: EndpointMessage | undefined;
|
| 40 |
+
for (let i = messages.length - 1; i >= 0; i -= 1) {
|
| 41 |
+
const msg = messages[i];
|
| 42 |
+
if (msg.from !== "user") continue;
|
| 43 |
+
const hasMatch = (msg.files ?? []).some((file) => {
|
| 44 |
+
const mime = file?.mime;
|
| 45 |
+
return refKinds.some((kind) => kind.matches(mime ?? ""));
|
| 46 |
+
});
|
| 47 |
+
if (hasMatch) {
|
| 48 |
+
lastUserWithFiles = msg;
|
| 49 |
+
break;
|
| 50 |
+
}
|
| 51 |
+
}
|
| 52 |
+
|
| 53 |
+
if (!lastUserWithFiles) return undefined;
|
| 54 |
+
|
| 55 |
+
// Bucket matched files by ref kind while preserving order within the message
|
| 56 |
+
const buckets = new Map<RefKind, FileRefPayload[]>();
|
| 57 |
+
for (const file of lastUserWithFiles.files ?? []) {
|
| 58 |
+
const mime = file?.mime ?? "";
|
| 59 |
+
const kind = refKinds.find((k) => k.matches(mime));
|
| 60 |
+
if (!kind) continue;
|
| 61 |
+
const payload: FileRefPayload = { name: file.name, mime, base64: file.value };
|
| 62 |
+
const arr = buckets.get(kind) ?? [];
|
| 63 |
+
arr.push(payload);
|
| 64 |
+
buckets.set(kind, arr);
|
| 65 |
+
}
|
| 66 |
+
|
| 67 |
+
if (buckets.size === 0) return undefined;
|
| 68 |
+
|
| 69 |
+
const resolver: FileRefResolver = (ref) => {
|
| 70 |
+
if (!ref || typeof ref !== "string") return undefined;
|
| 71 |
+
const trimmed = ref.trim().toLowerCase();
|
| 72 |
+
for (const kind of refKinds) {
|
| 73 |
+
const match = new RegExp(`^${kind.prefix}_(\\d+)$`).exec(trimmed);
|
| 74 |
+
if (!match) continue;
|
| 75 |
+
const idx = Number(match[1]) - 1;
|
| 76 |
+
const files = buckets.get(kind) ?? [];
|
| 77 |
+
if (Number.isFinite(idx) && idx >= 0 && idx < files.length) {
|
| 78 |
+
const payload = files[idx];
|
| 79 |
+
return payload ? { ...payload, refKind: kind } : undefined;
|
| 80 |
+
}
|
| 81 |
+
}
|
| 82 |
+
return undefined;
|
| 83 |
+
};
|
| 84 |
+
|
| 85 |
+
return resolver;
|
| 86 |
+
}
|
| 87 |
+
|
| 88 |
+
export function buildImageRefResolver(messages: EndpointMessage[]): FileRefResolver | undefined {
|
| 89 |
+
return buildFileRefResolver(messages, [IMAGE_REF_KIND]);
|
| 90 |
+
}
|
| 91 |
+
|
| 92 |
+
type FieldRule = {
|
| 93 |
+
keys: string[];
|
| 94 |
+
action: "attachPayload" | "replaceWithDataUrl";
|
| 95 |
+
attachKey?: string;
|
| 96 |
+
allowedPrefixes?: string[]; // limit to specific ref kinds (e.g. ["image"])
|
| 97 |
+
};
|
| 98 |
+
|
| 99 |
+
const DEFAULT_FIELD_RULES: FieldRule[] = [
|
| 100 |
+
{
|
| 101 |
+
keys: ["image_ref"],
|
| 102 |
+
action: "attachPayload",
|
| 103 |
+
attachKey: "image",
|
| 104 |
+
allowedPrefixes: ["image"],
|
| 105 |
+
},
|
| 106 |
+
{
|
| 107 |
+
keys: ["input_image"],
|
| 108 |
+
action: "replaceWithDataUrl",
|
| 109 |
+
allowedPrefixes: ["image"],
|
| 110 |
+
},
|
| 111 |
+
];
|
| 112 |
+
|
| 113 |
+
/**
|
| 114 |
+
* Walk tool args and hydrate known ref fields while keeping logging lightweight.
|
| 115 |
+
* Only image refs are recognized for now to preserve current behavior.
|
| 116 |
+
*/
|
| 117 |
+
export function attachFileRefsToArgs(
|
| 118 |
+
argsObj: Record<string, unknown>,
|
| 119 |
+
resolveRef?: FileRefResolver,
|
| 120 |
+
fieldRules: FieldRule[] = DEFAULT_FIELD_RULES
|
| 121 |
+
): void {
|
| 122 |
+
if (!resolveRef) return;
|
| 123 |
+
|
| 124 |
+
const visit = (node: unknown): void => {
|
| 125 |
+
if (!node || typeof node !== "object") return;
|
| 126 |
+
if (Array.isArray(node)) {
|
| 127 |
+
for (const v of node) visit(v);
|
| 128 |
+
return;
|
| 129 |
+
}
|
| 130 |
+
|
| 131 |
+
const obj = node as Record<string, unknown>;
|
| 132 |
+
for (const [key, value] of Object.entries(obj)) {
|
| 133 |
+
if (typeof value !== "string") {
|
| 134 |
+
if (value && typeof value === "object") visit(value);
|
| 135 |
+
continue;
|
| 136 |
+
}
|
| 137 |
+
|
| 138 |
+
const resolved = resolveRef(value);
|
| 139 |
+
if (!resolved) continue;
|
| 140 |
+
|
| 141 |
+
const rule = fieldRules.find((r) => r.keys.includes(key));
|
| 142 |
+
if (!rule) continue;
|
| 143 |
+
if (rule.allowedPrefixes && !rule.allowedPrefixes.includes(resolved.refKind.prefix)) continue;
|
| 144 |
+
|
| 145 |
+
if (rule.action === "attachPayload") {
|
| 146 |
+
const targetKey = rule.attachKey ?? "file";
|
| 147 |
+
if (
|
| 148 |
+
typeof obj[targetKey] !== "object" ||
|
| 149 |
+
obj[targetKey] === null ||
|
| 150 |
+
Array.isArray(obj[targetKey])
|
| 151 |
+
) {
|
| 152 |
+
obj[targetKey] = {
|
| 153 |
+
name: resolved.name,
|
| 154 |
+
mime: resolved.mime,
|
| 155 |
+
base64: resolved.base64,
|
| 156 |
+
};
|
| 157 |
+
}
|
| 158 |
+
} else if (rule.action === "replaceWithDataUrl") {
|
| 159 |
+
const toUrl =
|
| 160 |
+
resolved.refKind.toDataUrl ??
|
| 161 |
+
((p: FileRefPayload) => `data:${p.mime};base64,${p.base64}`);
|
| 162 |
+
obj[key] = toUrl(resolved);
|
| 163 |
+
}
|
| 164 |
+
}
|
| 165 |
+
};
|
| 166 |
+
|
| 167 |
+
visit(argsObj);
|
| 168 |
+
}
|
|
@@ -7,6 +7,7 @@ import {
|
|
| 7 |
pickToolsCapableModel,
|
| 8 |
ROUTER_TOOLS_ROUTE,
|
| 9 |
} from "$lib/server/router/toolsRoute";
|
|
|
|
| 10 |
import type { EndpointMessage } from "../../endpoints/endpoints";
|
| 11 |
import { stripReasoningFromMessageForRouting } from "../utils/routing";
|
| 12 |
import type { ProcessedModel } from "../../models";
|
|
@@ -48,15 +49,17 @@ export async function resolveRouterTarget({
|
|
| 48 |
const allModels = mod.models as ProcessedModel[];
|
| 49 |
|
| 50 |
if (hasImageInput) {
|
| 51 |
-
const multimodalCandidate =
|
| 52 |
-
|
| 53 |
-
|
| 54 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 55 |
targetModel = multimodalCandidate;
|
| 56 |
candidateModelId = multimodalCandidate.id ?? multimodalCandidate.name;
|
| 57 |
resolvedRoute = "multimodal";
|
| 58 |
-
} else {
|
| 59 |
-
runMcp = false;
|
| 60 |
}
|
| 61 |
} else {
|
| 62 |
// If tools are enabled and at least one MCP server is active, prefer a tools-capable model
|
|
|
|
| 7 |
pickToolsCapableModel,
|
| 8 |
ROUTER_TOOLS_ROUTE,
|
| 9 |
} from "$lib/server/router/toolsRoute";
|
| 10 |
+
import { findConfiguredMultimodalModel } from "$lib/server/router/multimodal";
|
| 11 |
import type { EndpointMessage } from "../../endpoints/endpoints";
|
| 12 |
import { stripReasoningFromMessageForRouting } from "../utils/routing";
|
| 13 |
import type { ProcessedModel } from "../../models";
|
|
|
|
| 49 |
const allModels = mod.models as ProcessedModel[];
|
| 50 |
|
| 51 |
if (hasImageInput) {
|
| 52 |
+
const multimodalCandidate = findConfiguredMultimodalModel(allModels);
|
| 53 |
+
if (!multimodalCandidate) {
|
| 54 |
+
runMcp = false;
|
| 55 |
+
logger.warn(
|
| 56 |
+
{ configuredModel: config.LLM_ROUTER_MULTIMODAL_MODEL },
|
| 57 |
+
"[mcp] multimodal input but configured model missing or invalid; skipping MCP route"
|
| 58 |
+
);
|
| 59 |
+
} else {
|
| 60 |
targetModel = multimodalCandidate;
|
| 61 |
candidateModelId = multimodalCandidate.id ?? multimodalCandidate.name;
|
| 62 |
resolvedRoute = "multimodal";
|
|
|
|
|
|
|
| 63 |
}
|
| 64 |
} else {
|
| 65 |
// If tools are enabled and at least one MCP server is active, prefer a tools-capable model
|
|
@@ -1,6 +1,5 @@
|
|
| 1 |
import { config } from "$lib/server/config";
|
| 2 |
import { MessageUpdateType, type MessageUpdate } from "$lib/types/MessageUpdate";
|
| 3 |
-
import type { EndpointMessage } from "../../endpoints/endpoints";
|
| 4 |
import { getMcpServers } from "$lib/server/mcp/registry";
|
| 5 |
import { isValidUrl } from "$lib/server/urlSafety";
|
| 6 |
import { resetMcpToolsCache } from "$lib/server/mcp/tools";
|
|
@@ -14,11 +13,13 @@ import type {
|
|
| 14 |
} from "openai/resources/chat/completions";
|
| 15 |
import type { Stream } from "openai/streaming";
|
| 16 |
import { buildToolPreprompt } from "../utils/toolPrompt";
|
|
|
|
| 17 |
import { resolveRouterTarget } from "./routerResolution";
|
| 18 |
import { executeToolCalls, type NormalizedToolCall } from "./toolInvocation";
|
| 19 |
import { drainPool } from "$lib/server/mcp/clientPool";
|
| 20 |
import type { TextGenerationContext } from "../types";
|
| 21 |
import { hasAuthHeader, isStrictHfMcpLogin, hasNonEmptyToken } from "$lib/server/mcp/hf";
|
|
|
|
| 22 |
|
| 23 |
export type RunMcpFlowContext = Pick<
|
| 24 |
TextGenerationContext,
|
|
@@ -200,6 +201,8 @@ export async function* runMcpFlow({
|
|
| 200 |
// If anything goes wrong reading the flag, proceed (previous behavior)
|
| 201 |
}
|
| 202 |
|
|
|
|
|
|
|
| 203 |
const hasImageInput = messages.some((msg) =>
|
| 204 |
(msg.files ?? []).some(
|
| 205 |
(file) => typeof file?.mime === "string" && file.mime.startsWith("image/")
|
|
@@ -599,6 +602,7 @@ export async function* runMcpFlow({
|
|
| 599 |
mapping,
|
| 600 |
servers,
|
| 601 |
parseArgs,
|
|
|
|
| 602 |
toPrimitive,
|
| 603 |
processToolOutput,
|
| 604 |
abortSignal,
|
|
|
|
| 1 |
import { config } from "$lib/server/config";
|
| 2 |
import { MessageUpdateType, type MessageUpdate } from "$lib/types/MessageUpdate";
|
|
|
|
| 3 |
import { getMcpServers } from "$lib/server/mcp/registry";
|
| 4 |
import { isValidUrl } from "$lib/server/urlSafety";
|
| 5 |
import { resetMcpToolsCache } from "$lib/server/mcp/tools";
|
|
|
|
| 13 |
} from "openai/resources/chat/completions";
|
| 14 |
import type { Stream } from "openai/streaming";
|
| 15 |
import { buildToolPreprompt } from "../utils/toolPrompt";
|
| 16 |
+
import type { EndpointMessage } from "../../endpoints/endpoints";
|
| 17 |
import { resolveRouterTarget } from "./routerResolution";
|
| 18 |
import { executeToolCalls, type NormalizedToolCall } from "./toolInvocation";
|
| 19 |
import { drainPool } from "$lib/server/mcp/clientPool";
|
| 20 |
import type { TextGenerationContext } from "../types";
|
| 21 |
import { hasAuthHeader, isStrictHfMcpLogin, hasNonEmptyToken } from "$lib/server/mcp/hf";
|
| 22 |
+
import { buildImageRefResolver } from "./fileRefs";
|
| 23 |
|
| 24 |
export type RunMcpFlowContext = Pick<
|
| 25 |
TextGenerationContext,
|
|
|
|
| 201 |
// If anything goes wrong reading the flag, proceed (previous behavior)
|
| 202 |
}
|
| 203 |
|
| 204 |
+
const resolveFileRef = buildImageRefResolver(messages);
|
| 205 |
+
|
| 206 |
const hasImageInput = messages.some((msg) =>
|
| 207 |
(msg.files ?? []).some(
|
| 208 |
(file) => typeof file?.mime === "string" && file.mime.startsWith("image/")
|
|
|
|
| 602 |
mapping,
|
| 603 |
servers,
|
| 604 |
parseArgs,
|
| 605 |
+
resolveFileRef,
|
| 606 |
toPrimitive,
|
| 607 |
processToolOutput,
|
| 608 |
abortSignal,
|
|
@@ -8,6 +8,7 @@ import type { McpToolMapping } from "$lib/server/mcp/tools";
|
|
| 8 |
import type { McpServerConfig } from "$lib/server/mcp/httpClient";
|
| 9 |
import { callMcpTool, type McpToolTextResponse } from "$lib/server/mcp/httpClient";
|
| 10 |
import { getClient } from "$lib/server/mcp/clientPool";
|
|
|
|
| 11 |
import type { Client } from "@modelcontextprotocol/sdk/client";
|
| 12 |
|
| 13 |
export type Primitive = string | number | boolean;
|
|
@@ -29,6 +30,7 @@ export interface ExecuteToolCallsParams {
|
|
| 29 |
mapping: Record<string, McpToolMapping>;
|
| 30 |
servers: McpServerConfig[];
|
| 31 |
parseArgs: (raw: unknown) => Record<string, unknown>;
|
|
|
|
| 32 |
toPrimitive: (value: unknown) => Primitive | undefined;
|
| 33 |
processToolOutput: (text: string) => {
|
| 34 |
annotated: string;
|
|
@@ -63,6 +65,7 @@ export async function* executeToolCalls({
|
|
| 63 |
mapping,
|
| 64 |
servers,
|
| 65 |
parseArgs,
|
|
|
|
| 66 |
toPrimitive,
|
| 67 |
processToolOutput,
|
| 68 |
abortSignal,
|
|
@@ -89,6 +92,11 @@ export async function* executeToolCalls({
|
|
| 89 |
const prim = toPrimitive(v);
|
| 90 |
if (prim !== undefined) paramsClean[k] = prim;
|
| 91 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 92 |
return { call, argsObj, paramsClean, uuid: randomUUID() };
|
| 93 |
});
|
| 94 |
|
|
|
|
| 8 |
import type { McpServerConfig } from "$lib/server/mcp/httpClient";
|
| 9 |
import { callMcpTool, type McpToolTextResponse } from "$lib/server/mcp/httpClient";
|
| 10 |
import { getClient } from "$lib/server/mcp/clientPool";
|
| 11 |
+
import { attachFileRefsToArgs, type FileRefResolver } from "./fileRefs";
|
| 12 |
import type { Client } from "@modelcontextprotocol/sdk/client";
|
| 13 |
|
| 14 |
export type Primitive = string | number | boolean;
|
|
|
|
| 30 |
mapping: Record<string, McpToolMapping>;
|
| 31 |
servers: McpServerConfig[];
|
| 32 |
parseArgs: (raw: unknown) => Record<string, unknown>;
|
| 33 |
+
resolveFileRef?: FileRefResolver;
|
| 34 |
toPrimitive: (value: unknown) => Primitive | undefined;
|
| 35 |
processToolOutput: (text: string) => {
|
| 36 |
annotated: string;
|
|
|
|
| 65 |
mapping,
|
| 66 |
servers,
|
| 67 |
parseArgs,
|
| 68 |
+
resolveFileRef,
|
| 69 |
toPrimitive,
|
| 70 |
processToolOutput,
|
| 71 |
abortSignal,
|
|
|
|
| 92 |
const prim = toPrimitive(v);
|
| 93 |
if (prim !== undefined) paramsClean[k] = prim;
|
| 94 |
}
|
| 95 |
+
// Attach any resolved image payloads _after_ computing paramsClean so that
|
| 96 |
+
// logging / status updates continue to show only the lightweight primitive
|
| 97 |
+
// arguments (e.g. "image_1") while the full data: URLs or image blobs are
|
| 98 |
+
// only sent to the MCP tool server.
|
| 99 |
+
attachFileRefsToArgs(argsObj, resolveFileRef);
|
| 100 |
return { call, argsObj, paramsClean, uuid: randomUUID() };
|
| 101 |
});
|
| 102 |
|
|
@@ -11,5 +11,11 @@ export function buildToolPreprompt(tools: OpenAiTool[]): string {
|
|
| 11 |
month: "long",
|
| 12 |
day: "numeric",
|
| 13 |
});
|
| 14 |
-
return
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 15 |
}
|
|
|
|
| 11 |
month: "long",
|
| 12 |
day: "numeric",
|
| 13 |
});
|
| 14 |
+
return [
|
| 15 |
+
`You can use the following tools if helpful: ${names.join(", ")}.`,
|
| 16 |
+
`Today's date: ${currentDate}.`,
|
| 17 |
+
`If a tool generates an image, you can inline it directly: .`,
|
| 18 |
+
`If a tool needs to operate on an image, set its image input parameter (for example, "input_image") to an image reference string.`,
|
| 19 |
+
`Use "image_1", "image_2", etc. to point to a specific image from a user message with images. You can also reuse a direct image URL from a prior tool result instead of pasting new base64 data.`,
|
| 20 |
+
].join(" ");
|
| 21 |
}
|