add types and constants ported from QMD pipeline
Browse filesCo-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
- src/constants.ts +43 -0
- src/types.ts +100 -0
src/constants.ts
ADDED
|
@@ -0,0 +1,43 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
// Chunking
|
| 2 |
+
export const CHUNK_SIZE_TOKENS = 900;
|
| 3 |
+
export const CHUNK_OVERLAP_TOKENS = 135; // 15%
|
| 4 |
+
export const CHUNK_SIZE_CHARS = 3600; // ~4 chars/token
|
| 5 |
+
export const CHUNK_OVERLAP_CHARS = 540;
|
| 6 |
+
|
| 7 |
+
// RRF
|
| 8 |
+
export const RRF_K = 60;
|
| 9 |
+
export const RRF_PRIMARY_WEIGHT = 2.0;
|
| 10 |
+
export const RRF_SECONDARY_WEIGHT = 1.0;
|
| 11 |
+
export const RRF_RANK1_BONUS = 0.05;
|
| 12 |
+
export const RRF_RANK2_BONUS = 0.02;
|
| 13 |
+
|
| 14 |
+
// Score Blending
|
| 15 |
+
export const BLEND_TOP3_RRF = 0.75;
|
| 16 |
+
export const BLEND_MID_RRF = 0.6;
|
| 17 |
+
export const BLEND_TAIL_RRF = 0.4;
|
| 18 |
+
|
| 19 |
+
// BM25
|
| 20 |
+
export const BM25_K1 = 1.2;
|
| 21 |
+
export const BM25_B = 0.75;
|
| 22 |
+
|
| 23 |
+
// Search
|
| 24 |
+
export const RERANK_CANDIDATE_LIMIT = 40;
|
| 25 |
+
export const RERANK_CONTEXT_TOKENS = 2048;
|
| 26 |
+
|
| 27 |
+
// Embedding templates (embeddinggemma format)
|
| 28 |
+
export const EMBED_QUERY_TEMPLATE = (query: string) =>
|
| 29 |
+
`task: search result | query: ${query}`;
|
| 30 |
+
export const EMBED_DOC_TEMPLATE = (title: string, body: string) =>
|
| 31 |
+
`title: ${title} | text: ${body}`;
|
| 32 |
+
|
| 33 |
+
// Model IDs for Transformers.js
|
| 34 |
+
export const MODEL_EMBEDDING = "onnx-community/embeddinggemma-300m-ONNX";
|
| 35 |
+
export const MODEL_RERANKER = "onnx-community/Qwen3-Reranker-0.6B-ONNX";
|
| 36 |
+
export const MODEL_EXPANSION = "shreyask/qmd-query-expansion-1.7B-ONNX"; // placeholder
|
| 37 |
+
|
| 38 |
+
// Example queries
|
| 39 |
+
export const EXAMPLE_QUERIES = [
|
| 40 |
+
"API versioning best practices",
|
| 41 |
+
"distributed consensus algorithms",
|
| 42 |
+
"gradient descent optimization",
|
| 43 |
+
];
|
src/types.ts
ADDED
|
@@ -0,0 +1,100 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
// A document loaded into the demo
|
| 2 |
+
export interface Document {
|
| 3 |
+
id: string; // filename-based
|
| 4 |
+
title: string; // extracted from H1 or filename
|
| 5 |
+
body: string; // full text content
|
| 6 |
+
filepath: string; // e.g. "api-design-principles.md"
|
| 7 |
+
}
|
| 8 |
+
|
| 9 |
+
// A chunk of a document
|
| 10 |
+
export interface Chunk {
|
| 11 |
+
docId: string;
|
| 12 |
+
chunkIndex: number;
|
| 13 |
+
text: string;
|
| 14 |
+
startChar: number; // offset in original doc
|
| 15 |
+
title: string; // parent doc title
|
| 16 |
+
}
|
| 17 |
+
|
| 18 |
+
// A chunk with its embedding vector
|
| 19 |
+
export interface EmbeddedChunk extends Chunk {
|
| 20 |
+
embedding: Float32Array;
|
| 21 |
+
}
|
| 22 |
+
|
| 23 |
+
// Search result from BM25 or vector search
|
| 24 |
+
export interface ScoredChunk {
|
| 25 |
+
chunk: Chunk;
|
| 26 |
+
score: number;
|
| 27 |
+
source: "bm25" | "vector";
|
| 28 |
+
}
|
| 29 |
+
|
| 30 |
+
// Result after RRF fusion
|
| 31 |
+
export interface RRFResult {
|
| 32 |
+
docId: string;
|
| 33 |
+
filepath: string;
|
| 34 |
+
title: string;
|
| 35 |
+
bestChunk: string;
|
| 36 |
+
score: number;
|
| 37 |
+
contributions: RRFContribution[];
|
| 38 |
+
}
|
| 39 |
+
|
| 40 |
+
export interface RRFContribution {
|
| 41 |
+
source: "bm25" | "vector";
|
| 42 |
+
queryType: "original" | "lex" | "vec" | "hyde";
|
| 43 |
+
query: string;
|
| 44 |
+
rank: number;
|
| 45 |
+
weight: number;
|
| 46 |
+
rrfContribution: number;
|
| 47 |
+
}
|
| 48 |
+
|
| 49 |
+
// Result after reranking
|
| 50 |
+
export interface RerankedResult extends RRFResult {
|
| 51 |
+
rerankScore: number;
|
| 52 |
+
blendedScore: number;
|
| 53 |
+
}
|
| 54 |
+
|
| 55 |
+
// Final result
|
| 56 |
+
export interface FinalResult {
|
| 57 |
+
filepath: string;
|
| 58 |
+
title: string;
|
| 59 |
+
bestChunk: string;
|
| 60 |
+
score: number; // blended score
|
| 61 |
+
docId: string;
|
| 62 |
+
}
|
| 63 |
+
|
| 64 |
+
// Query expansion output
|
| 65 |
+
export interface ExpandedQuery {
|
| 66 |
+
hyde: string; // hypothetical document snippet
|
| 67 |
+
vec: string[]; // dense retrieval sentences
|
| 68 |
+
lex: string; // BM25 keywords
|
| 69 |
+
}
|
| 70 |
+
|
| 71 |
+
// Pipeline events for React UI
|
| 72 |
+
export type PipelineStage = "expansion" | "search" | "rrf" | "rerank" | "blend";
|
| 73 |
+
export type PipelineStatus = "idle" | "running" | "done" | "error";
|
| 74 |
+
|
| 75 |
+
export type PipelineEvent =
|
| 76 |
+
| { stage: "expansion"; status: "running" }
|
| 77 |
+
| { stage: "expansion"; status: "done"; data: ExpandedQuery }
|
| 78 |
+
| { stage: "expansion"; status: "error"; error: string }
|
| 79 |
+
| { stage: "search"; status: "running" }
|
| 80 |
+
| {
|
| 81 |
+
stage: "search";
|
| 82 |
+
status: "done";
|
| 83 |
+
data: { bm25Hits: ScoredChunk[]; vectorHits: ScoredChunk[] };
|
| 84 |
+
}
|
| 85 |
+
| { stage: "rrf"; status: "done"; data: { merged: RRFResult[] } }
|
| 86 |
+
| { stage: "rerank"; status: "running" }
|
| 87 |
+
| {
|
| 88 |
+
stage: "rerank";
|
| 89 |
+
status: "done";
|
| 90 |
+
data: { before: RRFResult[]; after: RerankedResult[] };
|
| 91 |
+
}
|
| 92 |
+
| { stage: "blend"; status: "done"; data: { finalResults: FinalResult[] } };
|
| 93 |
+
|
| 94 |
+
// Model loading state
|
| 95 |
+
export interface ModelState {
|
| 96 |
+
name: string;
|
| 97 |
+
status: "pending" | "downloading" | "loading" | "ready" | "error";
|
| 98 |
+
progress: number; // 0-1
|
| 99 |
+
error?: string;
|
| 100 |
+
}
|