shreyask commited on
Commit
0ab4fc5
·
verified ·
1 Parent(s): 8a8f6ee

Deploy qmd-web

Browse files
assets/{index-BYk7Tddz.js → index-h12WNjfQ.js} RENAMED
The diff for this file is too large to render. See raw diff
 
index.html CHANGED
@@ -87,7 +87,7 @@
87
  }
88
  })();
89
  </script>
90
- <script type="module" crossorigin src="./assets/index-BYk7Tddz.js"></script>
91
  </head>
92
  <body>
93
  <div id="root"></div>
 
87
  }
88
  })();
89
  </script>
90
+ <script type="module" crossorigin src="./assets/index-h12WNjfQ.js"></script>
91
  </head>
92
  <body>
93
  <div id="root"></div>
src/components/QueryInput.tsx CHANGED
@@ -22,6 +22,9 @@ export default function QueryInput({ onSearch, disabled }: QueryInputProps) {
22
  if (exIntent) {
23
  setIntent(exIntent);
24
  setShowIntent(true);
 
 
 
25
  }
26
  onSearch(q, exIntent);
27
  }
 
22
  if (exIntent) {
23
  setIntent(exIntent);
24
  setShowIntent(true);
25
+ } else {
26
+ setIntent('');
27
+ setShowIntent(false);
28
  }
29
  onSearch(q, exIntent);
30
  }
src/pipeline/expansion.test.ts CHANGED
@@ -114,6 +114,32 @@ describe("parseExpansionOutput", () => {
114
  expect(result.vec).toEqual([QUERY]);
115
  expect(result.hyde).toBe("test query overview with useful detail");
116
  });
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
117
  });
118
 
119
  describe("sampleToken", () => {
 
114
  expect(result.vec).toEqual([QUERY]);
115
  expect(result.hyde).toBe("test query overview with useful detail");
116
  });
117
+
118
+ it("accepts expansions matching intent terms even when query terms are absent", () => {
119
+ const text = [
120
+ "lex: core web vitals optimization",
121
+ "vec: how to reduce page load time for web apps",
122
+ "hyde: A guide to improving page load times and web vitals",
123
+ ].join("\n");
124
+
125
+ const result = parseExpansionOutput(text, "performance", "web page load times");
126
+ expect(result.lex).toBe("core web vitals optimization");
127
+ expect(result.vec).toEqual(["how to reduce page load time for web apps"]);
128
+ expect(result.hyde).toBe("A guide to improving page load times and web vitals");
129
+ });
130
+
131
+ it("still filters lines that match neither query nor intent", () => {
132
+ const text = [
133
+ "lex: unrelated cooking recipe",
134
+ "vec: how to bake sourdough bread",
135
+ "hyde: A guide to improving page load times",
136
+ ].join("\n");
137
+
138
+ const result = parseExpansionOutput(text, "performance", "web page load times");
139
+ expect(result.lex).toBe("performance");
140
+ expect(result.vec).toEqual(["performance"]);
141
+ expect(result.hyde).toBe("A guide to improving page load times");
142
+ });
143
  });
144
 
145
  describe("sampleToken", () => {
src/pipeline/expansion.ts CHANGED
@@ -20,17 +20,22 @@ const TEMPERATURE = 0.7;
20
  const TOP_K = 20;
21
  const TOP_P = 0.8;
22
 
23
- // Parse the model's output text into structured ExpandedQuery
24
- function parseExpansionOutput(text: string, query: string): ExpandedQuery {
 
 
 
 
25
  const lines = text.trim().split("\n");
26
- const queryTerms = query
27
- .toLowerCase()
28
- .replace(/[^a-z0-9\s]/g, " ")
29
- .split(/\s+/)
30
- .filter(Boolean);
31
- const hasQueryTerm = (value: string) =>
32
- queryTerms.length === 0 ||
33
- queryTerms.some((term) => value.toLowerCase().includes(term));
 
34
  let lex = "";
35
  const vec: string[] = [];
36
  let hyde = "";
@@ -39,13 +44,13 @@ function parseExpansionOutput(text: string, query: string): ExpandedQuery {
39
  const trimmed = line.trim();
40
  if (trimmed.toLowerCase().startsWith("lex:")) {
41
  const value = trimmed.slice(4).trim();
42
- if (value && hasQueryTerm(value)) lex = value;
43
  } else if (trimmed.toLowerCase().startsWith("vec:")) {
44
  const value = trimmed.slice(4).trim();
45
- if (value && hasQueryTerm(value)) vec.push(value);
46
  } else if (trimmed.toLowerCase().startsWith("hyde:")) {
47
  const value = trimmed.slice(5).trim();
48
- if (value && hasQueryTerm(value)) hyde = value;
49
  }
50
  }
51
 
@@ -230,7 +235,7 @@ export async function expandQuery(query: string, intent?: string): Promise<Expan
230
  // Strip <think>...</think> blocks (model may emit reasoning despite /no_think)
231
  responseText = responseText.replace(/<think>[\s\S]*?<\/think>/g, "").trim();
232
 
233
- return parseExpansionOutput(responseText, query);
234
  }
235
 
236
  export { manualGenerate, parseExpansionOutput, sampleToken }; // export for testing
 
20
  const TOP_K = 20;
21
  const TOP_P = 0.8;
22
 
23
+ // Parse the model's output text into structured ExpandedQuery.
24
+ // Drift filter: lines must contain at least one term from query OR intent
25
+ // to avoid hallucinated expansions. When intent is present, the union of
26
+ // query + intent terms is used, so "core web vitals" passes for
27
+ // query="performance" + intent="web page load times".
28
+ function parseExpansionOutput(text: string, query: string, intent?: string): ExpandedQuery {
29
  const lines = text.trim().split("\n");
30
+ const extractTerms = (s: string) =>
31
+ s.toLowerCase().replace(/[^a-z0-9\s]/g, " ").split(/\s+/).filter(Boolean);
32
+ const allTerms = [
33
+ ...extractTerms(query),
34
+ ...(intent ? extractTerms(intent) : []),
35
+ ];
36
+ const hasRelevantTerm = (value: string) =>
37
+ allTerms.length === 0 ||
38
+ allTerms.some((term) => value.toLowerCase().includes(term));
39
  let lex = "";
40
  const vec: string[] = [];
41
  let hyde = "";
 
44
  const trimmed = line.trim();
45
  if (trimmed.toLowerCase().startsWith("lex:")) {
46
  const value = trimmed.slice(4).trim();
47
+ if (value && hasRelevantTerm(value)) lex = value;
48
  } else if (trimmed.toLowerCase().startsWith("vec:")) {
49
  const value = trimmed.slice(4).trim();
50
+ if (value && hasRelevantTerm(value)) vec.push(value);
51
  } else if (trimmed.toLowerCase().startsWith("hyde:")) {
52
  const value = trimmed.slice(5).trim();
53
+ if (value && hasRelevantTerm(value)) hyde = value;
54
  }
55
  }
56
 
 
235
  // Strip <think>...</think> blocks (model may emit reasoning despite /no_think)
236
  responseText = responseText.replace(/<think>[\s\S]*?<\/think>/g, "").trim();
237
 
238
+ return parseExpansionOutput(responseText, query, intent);
239
  }
240
 
241
  export { manualGenerate, parseExpansionOutput, sampleToken }; // export for testing
src/pipeline/orchestrator.test.ts CHANGED
@@ -1,6 +1,7 @@
1
  import { describe, expect, it } from "vitest";
2
  import type { EmbeddedChunk, ScoredChunk } from "../types";
3
  import {
 
4
  extractQueryTerms,
5
  hasStrongBm25Signal,
6
  normalizeBm25Score,
@@ -108,4 +109,46 @@ describe("selectBestChunkForRerank", () => {
108
  it("returns an empty string for empty chunk lists", () => {
109
  expect(selectBestChunkForRerank("query", [])).toBe("");
110
  });
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
111
  });
 
1
  import { describe, expect, it } from "vitest";
2
  import type { EmbeddedChunk, ScoredChunk } from "../types";
3
  import {
4
+ extractIntentTerms,
5
  extractQueryTerms,
6
  hasStrongBm25Signal,
7
  normalizeBm25Score,
 
109
  it("returns an empty string for empty chunk lists", () => {
110
  expect(selectBestChunkForRerank("query", [])).toBe("");
111
  });
112
+
113
+ it("boosts chunk selection using intent terms at 0.5x weight", () => {
114
+ const chunks = [
115
+ makeEmbeddedChunk("General performance tuning tips for servers", 0),
116
+ makeEmbeddedChunk("Core web vitals and page load optimization", 1),
117
+ makeEmbeddedChunk("CPU benchmarking guide for gaming rigs", 2),
118
+ ];
119
+
120
+ // Without intent, "performance" alone matches chunk 0 best
121
+ expect(selectBestChunkForRerank("performance", chunks)).toBe(
122
+ "General performance tuning tips for servers",
123
+ );
124
+
125
+ // With intent, chunk 1 wins via intent terms "web", "page", "load"
126
+ expect(selectBestChunkForRerank("performance", chunks, "web page load times")).toBe(
127
+ "Core web vitals and page load optimization",
128
+ );
129
+ });
130
+ });
131
+
132
+ describe("extractIntentTerms", () => {
133
+ it("filters stop words and preserves domain terms", () => {
134
+ const terms = extractIntentTerms("looking for API performance in the database");
135
+ expect(terms).toContain("api");
136
+ expect(terms).toContain("performance");
137
+ expect(terms).toContain("database");
138
+ expect(terms).not.toContain("looking");
139
+ expect(terms).not.toContain("for");
140
+ expect(terms).not.toContain("the");
141
+ });
142
+
143
+ it("preserves short domain acronyms like API, SQL, CI", () => {
144
+ const terms = extractIntentTerms("API SQL CI CD");
145
+ expect(terms).toEqual(["api", "sql", "ci", "cd"]);
146
+ });
147
+
148
+ it("strips Unicode punctuation", () => {
149
+ const terms = extractIntentTerms('"web" (vitals) —performance—');
150
+ expect(terms).toContain("web");
151
+ expect(terms).toContain("vitals");
152
+ expect(terms).toContain("performance");
153
+ });
154
  });