Spaces:
Running
Running
File size: 7,239 Bytes
fc69895 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 |
import { config } from "$lib/server/config";
import { logger } from "$lib/server/logger";
import type { EndpointMessage } from "../endpoints/endpoints";
import type { Route, RouteConfig, RouteSelection } from "./types";
import { getRoutes } from "./policy";
import { getApiToken } from "$lib/server/apiToken";
const DEFAULT_LAST_TURNS = 16;
/**
* Trim a message by keeping start and end, replacing middle with minimal indicator.
* Uses simple ellipsis since router only needs context for intent classification, not exact content.
* @param content - The message content to trim
* @param maxLength - Maximum total length (including indicator)
* @returns Trimmed content with start, ellipsis, and end
*/
function trimMiddle(content: string, maxLength: number): string {
if (content.length <= maxLength) return content;
const indicator = "…";
const availableLength = maxLength - indicator.length;
if (availableLength <= 0) {
// If no room even for indicator, just hard truncate
return content.slice(0, maxLength);
}
// Reserve more space for the start (typically contains context)
const startLength = Math.ceil(availableLength * 0.6);
const endLength = availableLength - startLength;
// Bug fix: slice(-0) returns entire string, so check for endLength <= 0
if (endLength <= 0) {
// Not enough space for end portion, just use start + indicator
return content.slice(0, availableLength) + indicator;
}
const start = content.slice(0, startLength);
const end = content.slice(-endLength);
return start + indicator + end;
}
const PROMPT_TEMPLATE = `
You are a helpful assistant designed to find the best suited route.
You are provided with route description within <routes></routes> XML tags:
<routes>
{routes}
</routes>
<conversation>
{conversation}
</conversation>
Your task is to decide which route is best suit with user intent on the conversation in <conversation></conversation> XML tags.
Follow those instructions:
1. Use prior turns to choose the best route for the current message if needed.
2. If no route match the full conversation respond with other route {"route": "other"}.
3. Analyze the route descriptions and find the best match route for user latest intent.
4. Respond only with the route name that best matches the user's request, using the exact name in the <routes> block.
Based on your analysis, provide your response in the following JSON format if you decide to match any route:
{"route": "route_name"}
`.trim();
function lastNTurns<T>(arr: T[], n = DEFAULT_LAST_TURNS) {
if (!Array.isArray(arr)) return [] as T[];
return arr.slice(-n);
}
function toRouterPrompt(messages: EndpointMessage[], routes: Route[]) {
const simpleRoutes: RouteConfig[] = routes.map((r) => ({
name: r.name,
description: r.description,
}));
const maxAssistantLength = parseInt(config.LLM_ROUTER_MAX_ASSISTANT_LENGTH || "1000", 10);
const maxPrevUserLength = parseInt(config.LLM_ROUTER_MAX_PREV_USER_LENGTH || "1000", 10);
const convo = messages
.map((m) => ({ role: m.from, content: m.content }))
.filter((m) => typeof m.content === "string" && m.content.trim() !== "");
// Find the last user message index to preserve its full content
const lastUserIndex = convo.findLastIndex((m) => m.role === "user");
const trimmedConvo = convo.map((m, idx) => {
if (typeof m.content !== "string") return m;
// Trim assistant messages to reduce routing prompt size and improve latency
// Keep start and end for better context understanding
if (m.role === "assistant") {
return {
...m,
content: trimMiddle(m.content, maxAssistantLength),
};
}
// Trim previous user messages, but keep the latest user message full
// Keep start and end to preserve both context and question
if (m.role === "user" && idx !== lastUserIndex) {
return {
...m,
content: trimMiddle(m.content, maxPrevUserLength),
};
}
return m;
});
return PROMPT_TEMPLATE.replace("{routes}", JSON.stringify(simpleRoutes)).replace(
"{conversation}",
JSON.stringify(lastNTurns(trimmedConvo))
);
}
function parseRouteName(text: string): string | undefined {
if (!text) return;
try {
const obj = JSON.parse(text);
if (typeof obj?.route === "string" && obj.route.trim()) return obj.route.trim();
} catch {}
const m = text.match(/["']route["']\s*:\s*["']([^"']+)["']/);
if (m?.[1]) return m[1].trim();
try {
const obj = JSON.parse(text.replace(/'/g, '"'));
if (typeof obj?.route === "string" && obj.route.trim()) return obj.route.trim();
} catch {}
return;
}
export async function archSelectRoute(
messages: EndpointMessage[],
traceId: string | undefined,
locals: App.Locals | undefined
): Promise<RouteSelection> {
const routes = await getRoutes();
const prompt = toRouterPrompt(messages, routes);
const baseURL = (config.LLM_ROUTER_ARCH_BASE_URL || "").replace(/\/$/, "");
const archModel = config.LLM_ROUTER_ARCH_MODEL || "router/omni";
if (!baseURL) {
logger.warn("LLM_ROUTER_ARCH_BASE_URL not set; routing will fail over to fallback.");
return { routeName: "arch_router_failure" };
}
const headers: HeadersInit = {
Authorization: `Bearer ${getApiToken(locals)}`,
"Content-Type": "application/json",
};
const body = {
model: archModel,
messages: [{ role: "user", content: prompt }],
temperature: 0,
max_tokens: 16,
stream: false,
};
const ctrl = new AbortController();
const timeoutMs = Number(config.LLM_ROUTER_ARCH_TIMEOUT_MS || 10000);
const to = setTimeout(() => ctrl.abort(), timeoutMs);
try {
const resp = await fetch(`${baseURL}/chat/completions`, {
method: "POST",
headers,
body: JSON.stringify(body),
signal: ctrl.signal,
});
clearTimeout(to);
if (!resp.ok) {
// Extract error message from response
let errorMessage = `arch-router ${resp.status}`;
try {
const errorData = await resp.json();
// Try to extract message from OpenAI-style error format
if (errorData.error?.message) {
errorMessage = errorData.error.message;
} else if (errorData.message) {
errorMessage = errorData.message;
}
} catch {
// If JSON parsing fails, use status text
errorMessage = resp.statusText || errorMessage;
}
logger.warn(
{ status: resp.status, error: errorMessage, traceId },
"[arch] router returned error"
);
return {
routeName: "arch_router_failure",
error: {
message: errorMessage,
statusCode: resp.status,
},
};
}
const data: { choices: { message: { content: string } }[] } = await resp.json();
const text = (data?.choices?.[0]?.message?.content ?? "").toString().trim();
const raw = parseRouteName(text);
const other = config.LLM_ROUTER_OTHER_ROUTE || "casual_conversation";
const chosen = raw === "other" ? other : raw || "casual_conversation";
const exists = routes.some((r) => r.name === chosen);
return { routeName: exists ? chosen : "casual_conversation" };
} catch (e) {
clearTimeout(to);
const err = e as Error;
logger.warn({ err: String(e), traceId }, "arch router selection failed");
// Return error with context but no status code (network/timeout errors)
return {
routeName: "arch_router_failure",
error: {
message: err.message || String(e),
},
};
}
}
|