Spaces:

shreyask
/

qmd-web

Running

App Files Files Community

shreyask commited on Mar 13

Commit

0ab4fc5

verified ·

1 Parent(s): 8a8f6ee

Deploy qmd-web

Browse files

Files changed (6) hide show

assets/{index-BYk7Tddz.js → index-h12WNjfQ.js} +0 -0
index.html +1 -1
src/components/QueryInput.tsx +3 -0
src/pipeline/expansion.test.ts +26 -0
src/pipeline/expansion.ts +19 -14
src/pipeline/orchestrator.test.ts +43 -0

assets/{index-BYk7Tddz.js → index-h12WNjfQ.js} RENAMED Viewed

The diff for this file is too large to render. See raw diff

index.html CHANGED Viewed

@@ -87,7 +87,7 @@
         }
       })();
     </script>
-    <script type="module" crossorigin src="./assets/index-BYk7Tddz.js"></script>
   </head>
   <body>
     <div id="root"></div>

         }
       })();
     </script>
+    <script type="module" crossorigin src="./assets/index-h12WNjfQ.js"></script>
   </head>
   <body>
     <div id="root"></div>

src/components/QueryInput.tsx CHANGED Viewed

@@ -22,6 +22,9 @@ export default function QueryInput({ onSearch, disabled }: QueryInputProps) {
     if (exIntent) {
       setIntent(exIntent);
       setShowIntent(true);
     }
     onSearch(q, exIntent);
   }

     if (exIntent) {
       setIntent(exIntent);
       setShowIntent(true);
+    } else {
+      setIntent('');
+      setShowIntent(false);
     }
     onSearch(q, exIntent);
   }

src/pipeline/expansion.test.ts CHANGED Viewed

@@ -114,6 +114,32 @@ describe("parseExpansionOutput", () => {
     expect(result.vec).toEqual([QUERY]);
     expect(result.hyde).toBe("test query overview with useful detail");
   });
 });
 describe("sampleToken", () => {

     expect(result.vec).toEqual([QUERY]);
     expect(result.hyde).toBe("test query overview with useful detail");
   });
+  it("accepts expansions matching intent terms even when query terms are absent", () => {
+    const text = [
+      "lex: core web vitals optimization",
+      "vec: how to reduce page load time for web apps",
+      "hyde: A guide to improving page load times and web vitals",
+    ].join("\n");
+    const result = parseExpansionOutput(text, "performance", "web page load times");
+    expect(result.lex).toBe("core web vitals optimization");
+    expect(result.vec).toEqual(["how to reduce page load time for web apps"]);
+    expect(result.hyde).toBe("A guide to improving page load times and web vitals");
+  });
+  it("still filters lines that match neither query nor intent", () => {
+    const text = [
+      "lex: unrelated cooking recipe",
+      "vec: how to bake sourdough bread",
+      "hyde: A guide to improving page load times",
+    ].join("\n");
+    const result = parseExpansionOutput(text, "performance", "web page load times");
+    expect(result.lex).toBe("performance");
+    expect(result.vec).toEqual(["performance"]);
+    expect(result.hyde).toBe("A guide to improving page load times");
+  });
 });
 describe("sampleToken", () => {

src/pipeline/expansion.ts CHANGED Viewed

@@ -20,17 +20,22 @@ const TEMPERATURE = 0.7;
 const TOP_K = 20;
 const TOP_P = 0.8;
-// Parse the model's output text into structured ExpandedQuery
-function parseExpansionOutput(text: string, query: string): ExpandedQuery {
   const lines = text.trim().split("\n");
-  const queryTerms = query
-    .toLowerCase()
-    .replace(/[^a-z0-9\s]/g, " ")
-    .split(/\s+/)
-    .filter(Boolean);
-  const hasQueryTerm = (value: string) =>
-    queryTerms.length === 0 ||
-    queryTerms.some((term) => value.toLowerCase().includes(term));
   let lex = "";
   const vec: string[] = [];
   let hyde = "";
@@ -39,13 +44,13 @@ function parseExpansionOutput(text: string, query: string): ExpandedQuery {
     const trimmed = line.trim();
     if (trimmed.toLowerCase().startsWith("lex:")) {
       const value = trimmed.slice(4).trim();
-      if (value && hasQueryTerm(value)) lex = value;
     } else if (trimmed.toLowerCase().startsWith("vec:")) {
       const value = trimmed.slice(4).trim();
-      if (value && hasQueryTerm(value)) vec.push(value);
     } else if (trimmed.toLowerCase().startsWith("hyde:")) {
       const value = trimmed.slice(5).trim();
-      if (value && hasQueryTerm(value)) hyde = value;
     }
   }
@@ -230,7 +235,7 @@ export async function expandQuery(query: string, intent?: string): Promise<Expan
   // Strip <think>...</think> blocks (model may emit reasoning despite /no_think)
   responseText = responseText.replace(/<think>[\s\S]*?<\/think>/g, "").trim();
-  return parseExpansionOutput(responseText, query);
 }
 export { manualGenerate, parseExpansionOutput, sampleToken }; // export for testing

 const TOP_K = 20;
 const TOP_P = 0.8;
+// Parse the model's output text into structured ExpandedQuery.
+// Drift filter: lines must contain at least one term from query OR intent
+// to avoid hallucinated expansions. When intent is present, the union of
+// query + intent terms is used, so "core web vitals" passes for
+// query="performance" + intent="web page load times".
+function parseExpansionOutput(text: string, query: string, intent?: string): ExpandedQuery {
   const lines = text.trim().split("\n");
+  const extractTerms = (s: string) =>
+    s.toLowerCase().replace(/[^a-z0-9\s]/g, " ").split(/\s+/).filter(Boolean);
+  const allTerms = [
+    ...extractTerms(query),
+    ...(intent ? extractTerms(intent) : []),
+  ];
+  const hasRelevantTerm = (value: string) =>
+    allTerms.length === 0 ||
+    allTerms.some((term) => value.toLowerCase().includes(term));
   let lex = "";
   const vec: string[] = [];
   let hyde = "";
     const trimmed = line.trim();
     if (trimmed.toLowerCase().startsWith("lex:")) {
       const value = trimmed.slice(4).trim();
+      if (value && hasRelevantTerm(value)) lex = value;
     } else if (trimmed.toLowerCase().startsWith("vec:")) {
       const value = trimmed.slice(4).trim();
+      if (value && hasRelevantTerm(value)) vec.push(value);
     } else if (trimmed.toLowerCase().startsWith("hyde:")) {
       const value = trimmed.slice(5).trim();
+      if (value && hasRelevantTerm(value)) hyde = value;
     }
   }
   // Strip <think>...</think> blocks (model may emit reasoning despite /no_think)
   responseText = responseText.replace(/<think>[\s\S]*?<\/think>/g, "").trim();
+  return parseExpansionOutput(responseText, query, intent);
 }
 export { manualGenerate, parseExpansionOutput, sampleToken }; // export for testing

src/pipeline/orchestrator.test.ts CHANGED Viewed

@@ -1,6 +1,7 @@
 import { describe, expect, it } from "vitest";
 import type { EmbeddedChunk, ScoredChunk } from "../types";
 import {
   extractQueryTerms,
   hasStrongBm25Signal,
   normalizeBm25Score,
@@ -108,4 +109,46 @@ describe("selectBestChunkForRerank", () => {
   it("returns an empty string for empty chunk lists", () => {
     expect(selectBestChunkForRerank("query", [])).toBe("");
   });
 });

 import { describe, expect, it } from "vitest";
 import type { EmbeddedChunk, ScoredChunk } from "../types";
 import {
+  extractIntentTerms,
   extractQueryTerms,
   hasStrongBm25Signal,
   normalizeBm25Score,
   it("returns an empty string for empty chunk lists", () => {
     expect(selectBestChunkForRerank("query", [])).toBe("");
   });
+  it("boosts chunk selection using intent terms at 0.5x weight", () => {
+    const chunks = [
+      makeEmbeddedChunk("General performance tuning tips for servers", 0),
+      makeEmbeddedChunk("Core web vitals and page load optimization", 1),
+      makeEmbeddedChunk("CPU benchmarking guide for gaming rigs", 2),
+    ];
+    // Without intent, "performance" alone matches chunk 0 best
+    expect(selectBestChunkForRerank("performance", chunks)).toBe(
+      "General performance tuning tips for servers",
+    );
+    // With intent, chunk 1 wins via intent terms "web", "page", "load"
+    expect(selectBestChunkForRerank("performance", chunks, "web page load times")).toBe(
+      "Core web vitals and page load optimization",
+    );
+  });
+});
+describe("extractIntentTerms", () => {
+  it("filters stop words and preserves domain terms", () => {
+    const terms = extractIntentTerms("looking for API performance in the database");
+    expect(terms).toContain("api");
+    expect(terms).toContain("performance");
+    expect(terms).toContain("database");
+    expect(terms).not.toContain("looking");
+    expect(terms).not.toContain("for");
+    expect(terms).not.toContain("the");
+  });
+  it("preserves short domain acronyms like API, SQL, CI", () => {
+    const terms = extractIntentTerms("API SQL CI CD");
+    expect(terms).toEqual(["api", "sql", "ci", "cd"]);
+  });
+  it("strips Unicode punctuation", () => {
+    const terms = extractIntentTerms('"web" (vitals) —performance—');
+    expect(terms).toContain("web");
+    expect(terms).toContain("vitals");
+    expect(terms).toContain("performance");
+  });
 });