victor HF Staff commited on
Commit
10f34e5
·
unverified ·
1 Parent(s): b33184c

Add message trimming for router prompt construction (#1935)

Browse files

Introduces configurable maximum lengths for assistant and previous user messages in the router prompt via new .env variables. Implements a trimMiddle function to keep the start and end of long messages, improving prompt efficiency and latency while preserving relevant context for route selection.

Files changed (2) hide show
  1. .env +4 -0
  2. src/lib/server/router/arch.ts +67 -1
.env CHANGED
@@ -62,6 +62,10 @@ LLM_ROUTER_OTHER_ROUTE=casual_conversation
62
  LLM_ROUTER_FALLBACK_MODEL=
63
  # Arch selection timeout in milliseconds (default 10000)
64
  LLM_ROUTER_ARCH_TIMEOUT_MS=10000
 
 
 
 
65
 
66
  # Enable router multimodal fallback (set to true to allow image inputs via router)
67
  LLM_ROUTER_ENABLE_MULTIMODAL=false
 
62
  LLM_ROUTER_FALLBACK_MODEL=
63
  # Arch selection timeout in milliseconds (default 10000)
64
  LLM_ROUTER_ARCH_TIMEOUT_MS=10000
65
+ # Maximum length (in characters) for assistant messages sent to router for route selection (default 500)
66
+ LLM_ROUTER_MAX_ASSISTANT_LENGTH=500
67
+ # Maximum length (in characters) for previous user messages sent to router (latest user message not trimmed, default 400)
68
+ LLM_ROUTER_MAX_PREV_USER_LENGTH=400
69
 
70
  # Enable router multimodal fallback (set to true to allow image inputs via router)
71
  LLM_ROUTER_ENABLE_MULTIMODAL=false
src/lib/server/router/arch.ts CHANGED
@@ -6,6 +6,41 @@ import { getRoutes } from "./policy";
6
  import { getApiToken } from "$lib/server/apiToken";
7
 
8
  const DEFAULT_LAST_TURNS = 16;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9
  const PROMPT_TEMPLATE = `
10
  You are a helpful assistant designed to find the best suited route.
11
  You are provided with route description within <routes></routes> XML tags:
@@ -43,12 +78,43 @@ function toRouterPrompt(messages: EndpointMessage[], routes: Route[]) {
43
  name: r.name,
44
  description: r.description,
45
  }));
 
 
 
46
  const convo = messages
47
  .map((m) => ({ role: m.from, content: m.content }))
48
  .filter((m) => typeof m.content === "string" && m.content.trim() !== "");
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
49
  return PROMPT_TEMPLATE.replace("{routes}", JSON.stringify(simpleRoutes)).replace(
50
  "{conversation}",
51
- JSON.stringify(lastNTurns(convo))
52
  );
53
  }
54
 
 
6
  import { getApiToken } from "$lib/server/apiToken";
7
 
8
  const DEFAULT_LAST_TURNS = 16;
9
+
10
+ /**
11
+ * Trim a message by keeping start and end, replacing middle with minimal indicator.
12
+ * Uses simple ellipsis since router only needs context for intent classification, not exact content.
13
+ * @param content - The message content to trim
14
+ * @param maxLength - Maximum total length (including indicator)
15
+ * @returns Trimmed content with start, ellipsis, and end
16
+ */
17
+ function trimMiddle(content: string, maxLength: number): string {
18
+ if (content.length <= maxLength) return content;
19
+
20
+ const indicator = "…";
21
+ const availableLength = maxLength - indicator.length;
22
+
23
+ if (availableLength <= 0) {
24
+ // If no room even for indicator, just hard truncate
25
+ return content.slice(0, maxLength);
26
+ }
27
+
28
+ // Reserve more space for the start (typically contains context)
29
+ const startLength = Math.ceil(availableLength * 0.6);
30
+ const endLength = availableLength - startLength;
31
+
32
+ // Bug fix: slice(-0) returns entire string, so check for endLength <= 0
33
+ if (endLength <= 0) {
34
+ // Not enough space for end portion, just use start + indicator
35
+ return content.slice(0, availableLength) + indicator;
36
+ }
37
+
38
+ const start = content.slice(0, startLength);
39
+ const end = content.slice(-endLength);
40
+
41
+ return start + indicator + end;
42
+ }
43
+
44
  const PROMPT_TEMPLATE = `
45
  You are a helpful assistant designed to find the best suited route.
46
  You are provided with route description within <routes></routes> XML tags:
 
78
  name: r.name,
79
  description: r.description,
80
  }));
81
+ const maxAssistantLength = parseInt(config.LLM_ROUTER_MAX_ASSISTANT_LENGTH || "500", 10);
82
+ const maxPrevUserLength = parseInt(config.LLM_ROUTER_MAX_PREV_USER_LENGTH || "400", 10);
83
+
84
  const convo = messages
85
  .map((m) => ({ role: m.from, content: m.content }))
86
  .filter((m) => typeof m.content === "string" && m.content.trim() !== "");
87
+
88
+ // Find the last user message index to preserve its full content
89
+ const lastUserIndex = convo.findLastIndex((m) => m.role === "user");
90
+
91
+ const trimmedConvo = convo.map((m, idx) => {
92
+ if (typeof m.content !== "string") return m;
93
+
94
+ // Trim assistant messages to reduce routing prompt size and improve latency
95
+ // Keep start and end for better context understanding
96
+ if (m.role === "assistant") {
97
+ return {
98
+ ...m,
99
+ content: trimMiddle(m.content, maxAssistantLength),
100
+ };
101
+ }
102
+
103
+ // Trim previous user messages, but keep the latest user message full
104
+ // Keep start and end to preserve both context and question
105
+ if (m.role === "user" && idx !== lastUserIndex) {
106
+ return {
107
+ ...m,
108
+ content: trimMiddle(m.content, maxPrevUserLength),
109
+ };
110
+ }
111
+
112
+ return m;
113
+ });
114
+
115
  return PROMPT_TEMPLATE.replace("{routes}", JSON.stringify(simpleRoutes)).replace(
116
  "{conversation}",
117
+ JSON.stringify(lastNTurns(trimmedConvo))
118
  );
119
  }
120