Spaces:

mishig
/

chat-ui

Sleeping

App Files Files Community

victor HF Staff commited on Oct 15, 2025

Commit

10f34e5

unverified ·

1 Parent(s): b33184c

Add message trimming for router prompt construction (#1935)

Browse files

Introduces configurable maximum lengths for assistant and previous user messages in the router prompt via new .env variables. Implements a trimMiddle function to keep the start and end of long messages, improving prompt efficiency and latency while preserving relevant context for route selection.

Files changed (2) hide show

.env +4 -0
src/lib/server/router/arch.ts +67 -1

.env CHANGED Viewed

@@ -62,6 +62,10 @@ LLM_ROUTER_OTHER_ROUTE=casual_conversation
 LLM_ROUTER_FALLBACK_MODEL=
 # Arch selection timeout in milliseconds (default 10000)
 LLM_ROUTER_ARCH_TIMEOUT_MS=10000
 # Enable router multimodal fallback (set to true to allow image inputs via router)
 LLM_ROUTER_ENABLE_MULTIMODAL=false

 LLM_ROUTER_FALLBACK_MODEL=
 # Arch selection timeout in milliseconds (default 10000)
 LLM_ROUTER_ARCH_TIMEOUT_MS=10000
+# Maximum length (in characters) for assistant messages sent to router for route selection (default 500)
+LLM_ROUTER_MAX_ASSISTANT_LENGTH=500
+# Maximum length (in characters) for previous user messages sent to router (latest user message not trimmed, default 400)
+LLM_ROUTER_MAX_PREV_USER_LENGTH=400
 # Enable router multimodal fallback (set to true to allow image inputs via router)
 LLM_ROUTER_ENABLE_MULTIMODAL=false

src/lib/server/router/arch.ts CHANGED Viewed

@@ -6,6 +6,41 @@ import { getRoutes } from "./policy";
 import { getApiToken } from "$lib/server/apiToken";
 const DEFAULT_LAST_TURNS = 16;
 const PROMPT_TEMPLATE = `
 You are a helpful assistant designed to find the best suited route.
 You are provided with route description within <routes></routes> XML tags:
@@ -43,12 +78,43 @@ function toRouterPrompt(messages: EndpointMessage[], routes: Route[]) {
 		name: r.name,
 		description: r.description,
 	}));
 	const convo = messages
 		.map((m) => ({ role: m.from, content: m.content }))
 		.filter((m) => typeof m.content === "string" && m.content.trim() !== "");
 	return PROMPT_TEMPLATE.replace("{routes}", JSON.stringify(simpleRoutes)).replace(
 		"{conversation}",
-		JSON.stringify(lastNTurns(convo))
 	);
 }

 import { getApiToken } from "$lib/server/apiToken";
 const DEFAULT_LAST_TURNS = 16;
+/**
+ * Trim a message by keeping start and end, replacing middle with minimal indicator.
+ * Uses simple ellipsis since router only needs context for intent classification, not exact content.
+ * @param content - The message content to trim
+ * @param maxLength - Maximum total length (including indicator)
+ * @returns Trimmed content with start, ellipsis, and end
+ */
+function trimMiddle(content: string, maxLength: number): string {
+	if (content.length <= maxLength) return content;
+	const indicator = "…";
+	const availableLength = maxLength - indicator.length;
+	if (availableLength <= 0) {
+		// If no room even for indicator, just hard truncate
+		return content.slice(0, maxLength);
+	}
+	// Reserve more space for the start (typically contains context)
+	const startLength = Math.ceil(availableLength * 0.6);
+	const endLength = availableLength - startLength;
+	// Bug fix: slice(-0) returns entire string, so check for endLength <= 0
+	if (endLength <= 0) {
+		// Not enough space for end portion, just use start + indicator
+		return content.slice(0, availableLength) + indicator;
+	}
+	const start = content.slice(0, startLength);
+	const end = content.slice(-endLength);
+	return start + indicator + end;
+}
 const PROMPT_TEMPLATE = `
 You are a helpful assistant designed to find the best suited route.
 You are provided with route description within <routes></routes> XML tags:
 		name: r.name,
 		description: r.description,
 	}));
+	const maxAssistantLength = parseInt(config.LLM_ROUTER_MAX_ASSISTANT_LENGTH || "500", 10);
+	const maxPrevUserLength = parseInt(config.LLM_ROUTER_MAX_PREV_USER_LENGTH || "400", 10);
 	const convo = messages
 		.map((m) => ({ role: m.from, content: m.content }))
 		.filter((m) => typeof m.content === "string" && m.content.trim() !== "");
+	// Find the last user message index to preserve its full content
+	const lastUserIndex = convo.findLastIndex((m) => m.role === "user");
+	const trimmedConvo = convo.map((m, idx) => {
+		if (typeof m.content !== "string") return m;
+		// Trim assistant messages to reduce routing prompt size and improve latency
+		// Keep start and end for better context understanding
+		if (m.role === "assistant") {
+			return {
+				...m,
+				content: trimMiddle(m.content, maxAssistantLength),
+			};
+		}
+		// Trim previous user messages, but keep the latest user message full
+		// Keep start and end to preserve both context and question
+		if (m.role === "user" && idx !== lastUserIndex) {
+			return {
+				...m,
+				content: trimMiddle(m.content, maxPrevUserLength),
+			};
+		}
+		return m;
+	});
 	return PROMPT_TEMPLATE.replace("{routes}", JSON.stringify(simpleRoutes)).replace(
 		"{conversation}",
+		JSON.stringify(lastNTurns(trimmedConvo))
 	);
 }