Deploy qmd-web
Browse files- assets/{index-BYk7Tddz.js → index-h12WNjfQ.js} +0 -0
- index.html +1 -1
- src/components/QueryInput.tsx +3 -0
- src/pipeline/expansion.test.ts +26 -0
- src/pipeline/expansion.ts +19 -14
- src/pipeline/orchestrator.test.ts +43 -0
assets/{index-BYk7Tddz.js → index-h12WNjfQ.js}
RENAMED
|
The diff for this file is too large to render.
See raw diff
|
|
|
index.html
CHANGED
|
@@ -87,7 +87,7 @@
|
|
| 87 |
}
|
| 88 |
})();
|
| 89 |
</script>
|
| 90 |
-
<script type="module" crossorigin src="./assets/index-
|
| 91 |
</head>
|
| 92 |
<body>
|
| 93 |
<div id="root"></div>
|
|
|
|
| 87 |
}
|
| 88 |
})();
|
| 89 |
</script>
|
| 90 |
+
<script type="module" crossorigin src="./assets/index-h12WNjfQ.js"></script>
|
| 91 |
</head>
|
| 92 |
<body>
|
| 93 |
<div id="root"></div>
|
src/components/QueryInput.tsx
CHANGED
|
@@ -22,6 +22,9 @@ export default function QueryInput({ onSearch, disabled }: QueryInputProps) {
|
|
| 22 |
if (exIntent) {
|
| 23 |
setIntent(exIntent);
|
| 24 |
setShowIntent(true);
|
|
|
|
|
|
|
|
|
|
| 25 |
}
|
| 26 |
onSearch(q, exIntent);
|
| 27 |
}
|
|
|
|
| 22 |
if (exIntent) {
|
| 23 |
setIntent(exIntent);
|
| 24 |
setShowIntent(true);
|
| 25 |
+
} else {
|
| 26 |
+
setIntent('');
|
| 27 |
+
setShowIntent(false);
|
| 28 |
}
|
| 29 |
onSearch(q, exIntent);
|
| 30 |
}
|
src/pipeline/expansion.test.ts
CHANGED
|
@@ -114,6 +114,32 @@ describe("parseExpansionOutput", () => {
|
|
| 114 |
expect(result.vec).toEqual([QUERY]);
|
| 115 |
expect(result.hyde).toBe("test query overview with useful detail");
|
| 116 |
});
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 117 |
});
|
| 118 |
|
| 119 |
describe("sampleToken", () => {
|
|
|
|
| 114 |
expect(result.vec).toEqual([QUERY]);
|
| 115 |
expect(result.hyde).toBe("test query overview with useful detail");
|
| 116 |
});
|
| 117 |
+
|
| 118 |
+
it("accepts expansions matching intent terms even when query terms are absent", () => {
|
| 119 |
+
const text = [
|
| 120 |
+
"lex: core web vitals optimization",
|
| 121 |
+
"vec: how to reduce page load time for web apps",
|
| 122 |
+
"hyde: A guide to improving page load times and web vitals",
|
| 123 |
+
].join("\n");
|
| 124 |
+
|
| 125 |
+
const result = parseExpansionOutput(text, "performance", "web page load times");
|
| 126 |
+
expect(result.lex).toBe("core web vitals optimization");
|
| 127 |
+
expect(result.vec).toEqual(["how to reduce page load time for web apps"]);
|
| 128 |
+
expect(result.hyde).toBe("A guide to improving page load times and web vitals");
|
| 129 |
+
});
|
| 130 |
+
|
| 131 |
+
it("still filters lines that match neither query nor intent", () => {
|
| 132 |
+
const text = [
|
| 133 |
+
"lex: unrelated cooking recipe",
|
| 134 |
+
"vec: how to bake sourdough bread",
|
| 135 |
+
"hyde: A guide to improving page load times",
|
| 136 |
+
].join("\n");
|
| 137 |
+
|
| 138 |
+
const result = parseExpansionOutput(text, "performance", "web page load times");
|
| 139 |
+
expect(result.lex).toBe("performance");
|
| 140 |
+
expect(result.vec).toEqual(["performance"]);
|
| 141 |
+
expect(result.hyde).toBe("A guide to improving page load times");
|
| 142 |
+
});
|
| 143 |
});
|
| 144 |
|
| 145 |
describe("sampleToken", () => {
|
src/pipeline/expansion.ts
CHANGED
|
@@ -20,17 +20,22 @@ const TEMPERATURE = 0.7;
|
|
| 20 |
const TOP_K = 20;
|
| 21 |
const TOP_P = 0.8;
|
| 22 |
|
| 23 |
-
// Parse the model's output text into structured ExpandedQuery
|
| 24 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 25 |
const lines = text.trim().split("\n");
|
| 26 |
-
const
|
| 27 |
-
.toLowerCase()
|
| 28 |
-
|
| 29 |
-
.
|
| 30 |
-
.
|
| 31 |
-
|
| 32 |
-
|
| 33 |
-
|
|
|
|
| 34 |
let lex = "";
|
| 35 |
const vec: string[] = [];
|
| 36 |
let hyde = "";
|
|
@@ -39,13 +44,13 @@ function parseExpansionOutput(text: string, query: string): ExpandedQuery {
|
|
| 39 |
const trimmed = line.trim();
|
| 40 |
if (trimmed.toLowerCase().startsWith("lex:")) {
|
| 41 |
const value = trimmed.slice(4).trim();
|
| 42 |
-
if (value &&
|
| 43 |
} else if (trimmed.toLowerCase().startsWith("vec:")) {
|
| 44 |
const value = trimmed.slice(4).trim();
|
| 45 |
-
if (value &&
|
| 46 |
} else if (trimmed.toLowerCase().startsWith("hyde:")) {
|
| 47 |
const value = trimmed.slice(5).trim();
|
| 48 |
-
if (value &&
|
| 49 |
}
|
| 50 |
}
|
| 51 |
|
|
@@ -230,7 +235,7 @@ export async function expandQuery(query: string, intent?: string): Promise<Expan
|
|
| 230 |
// Strip <think>...</think> blocks (model may emit reasoning despite /no_think)
|
| 231 |
responseText = responseText.replace(/<think>[\s\S]*?<\/think>/g, "").trim();
|
| 232 |
|
| 233 |
-
return parseExpansionOutput(responseText, query);
|
| 234 |
}
|
| 235 |
|
| 236 |
export { manualGenerate, parseExpansionOutput, sampleToken }; // export for testing
|
|
|
|
| 20 |
const TOP_K = 20;
|
| 21 |
const TOP_P = 0.8;
|
| 22 |
|
| 23 |
+
// Parse the model's output text into structured ExpandedQuery.
|
| 24 |
+
// Drift filter: lines must contain at least one term from query OR intent
|
| 25 |
+
// to avoid hallucinated expansions. When intent is present, the union of
|
| 26 |
+
// query + intent terms is used, so "core web vitals" passes for
|
| 27 |
+
// query="performance" + intent="web page load times".
|
| 28 |
+
function parseExpansionOutput(text: string, query: string, intent?: string): ExpandedQuery {
|
| 29 |
const lines = text.trim().split("\n");
|
| 30 |
+
const extractTerms = (s: string) =>
|
| 31 |
+
s.toLowerCase().replace(/[^a-z0-9\s]/g, " ").split(/\s+/).filter(Boolean);
|
| 32 |
+
const allTerms = [
|
| 33 |
+
...extractTerms(query),
|
| 34 |
+
...(intent ? extractTerms(intent) : []),
|
| 35 |
+
];
|
| 36 |
+
const hasRelevantTerm = (value: string) =>
|
| 37 |
+
allTerms.length === 0 ||
|
| 38 |
+
allTerms.some((term) => value.toLowerCase().includes(term));
|
| 39 |
let lex = "";
|
| 40 |
const vec: string[] = [];
|
| 41 |
let hyde = "";
|
|
|
|
| 44 |
const trimmed = line.trim();
|
| 45 |
if (trimmed.toLowerCase().startsWith("lex:")) {
|
| 46 |
const value = trimmed.slice(4).trim();
|
| 47 |
+
if (value && hasRelevantTerm(value)) lex = value;
|
| 48 |
} else if (trimmed.toLowerCase().startsWith("vec:")) {
|
| 49 |
const value = trimmed.slice(4).trim();
|
| 50 |
+
if (value && hasRelevantTerm(value)) vec.push(value);
|
| 51 |
} else if (trimmed.toLowerCase().startsWith("hyde:")) {
|
| 52 |
const value = trimmed.slice(5).trim();
|
| 53 |
+
if (value && hasRelevantTerm(value)) hyde = value;
|
| 54 |
}
|
| 55 |
}
|
| 56 |
|
|
|
|
| 235 |
// Strip <think>...</think> blocks (model may emit reasoning despite /no_think)
|
| 236 |
responseText = responseText.replace(/<think>[\s\S]*?<\/think>/g, "").trim();
|
| 237 |
|
| 238 |
+
return parseExpansionOutput(responseText, query, intent);
|
| 239 |
}
|
| 240 |
|
| 241 |
export { manualGenerate, parseExpansionOutput, sampleToken }; // export for testing
|
src/pipeline/orchestrator.test.ts
CHANGED
|
@@ -1,6 +1,7 @@
|
|
| 1 |
import { describe, expect, it } from "vitest";
|
| 2 |
import type { EmbeddedChunk, ScoredChunk } from "../types";
|
| 3 |
import {
|
|
|
|
| 4 |
extractQueryTerms,
|
| 5 |
hasStrongBm25Signal,
|
| 6 |
normalizeBm25Score,
|
|
@@ -108,4 +109,46 @@ describe("selectBestChunkForRerank", () => {
|
|
| 108 |
it("returns an empty string for empty chunk lists", () => {
|
| 109 |
expect(selectBestChunkForRerank("query", [])).toBe("");
|
| 110 |
});
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 111 |
});
|
|
|
|
| 1 |
import { describe, expect, it } from "vitest";
|
| 2 |
import type { EmbeddedChunk, ScoredChunk } from "../types";
|
| 3 |
import {
|
| 4 |
+
extractIntentTerms,
|
| 5 |
extractQueryTerms,
|
| 6 |
hasStrongBm25Signal,
|
| 7 |
normalizeBm25Score,
|
|
|
|
| 109 |
it("returns an empty string for empty chunk lists", () => {
|
| 110 |
expect(selectBestChunkForRerank("query", [])).toBe("");
|
| 111 |
});
|
| 112 |
+
|
| 113 |
+
it("boosts chunk selection using intent terms at 0.5x weight", () => {
|
| 114 |
+
const chunks = [
|
| 115 |
+
makeEmbeddedChunk("General performance tuning tips for servers", 0),
|
| 116 |
+
makeEmbeddedChunk("Core web vitals and page load optimization", 1),
|
| 117 |
+
makeEmbeddedChunk("CPU benchmarking guide for gaming rigs", 2),
|
| 118 |
+
];
|
| 119 |
+
|
| 120 |
+
// Without intent, "performance" alone matches chunk 0 best
|
| 121 |
+
expect(selectBestChunkForRerank("performance", chunks)).toBe(
|
| 122 |
+
"General performance tuning tips for servers",
|
| 123 |
+
);
|
| 124 |
+
|
| 125 |
+
// With intent, chunk 1 wins via intent terms "web", "page", "load"
|
| 126 |
+
expect(selectBestChunkForRerank("performance", chunks, "web page load times")).toBe(
|
| 127 |
+
"Core web vitals and page load optimization",
|
| 128 |
+
);
|
| 129 |
+
});
|
| 130 |
+
});
|
| 131 |
+
|
| 132 |
+
describe("extractIntentTerms", () => {
|
| 133 |
+
it("filters stop words and preserves domain terms", () => {
|
| 134 |
+
const terms = extractIntentTerms("looking for API performance in the database");
|
| 135 |
+
expect(terms).toContain("api");
|
| 136 |
+
expect(terms).toContain("performance");
|
| 137 |
+
expect(terms).toContain("database");
|
| 138 |
+
expect(terms).not.toContain("looking");
|
| 139 |
+
expect(terms).not.toContain("for");
|
| 140 |
+
expect(terms).not.toContain("the");
|
| 141 |
+
});
|
| 142 |
+
|
| 143 |
+
it("preserves short domain acronyms like API, SQL, CI", () => {
|
| 144 |
+
const terms = extractIntentTerms("API SQL CI CD");
|
| 145 |
+
expect(terms).toEqual(["api", "sql", "ci", "cd"]);
|
| 146 |
+
});
|
| 147 |
+
|
| 148 |
+
it("strips Unicode punctuation", () => {
|
| 149 |
+
const terms = extractIntentTerms('"web" (vitals) —performance—');
|
| 150 |
+
expect(terms).toContain("web");
|
| 151 |
+
expect(terms).toContain("vitals");
|
| 152 |
+
expect(terms).toContain("performance");
|
| 153 |
+
});
|
| 154 |
});
|