qmd-web / src /constants.ts
shreyask's picture
Deploy qmd-web
8a8f6ee verified
// Chunking
export const CHUNK_SIZE_TOKENS = 900;
export const CHUNK_OVERLAP_TOKENS = 135; // 15%
export const CHUNK_SIZE_CHARS = 3600; // ~4 chars/token
export const CHUNK_OVERLAP_CHARS = 540;
// RRF
export const RRF_K = 60;
export const RRF_PRIMARY_WEIGHT = 2.0;
export const RRF_SECONDARY_WEIGHT = 1.0;
export const RRF_RANK1_BONUS = 0.05;
export const RRF_RANK2_BONUS = 0.02;
// Strong lexical match detection
export const STRONG_SIGNAL_MIN_SCORE = 0.85;
export const STRONG_SIGNAL_MIN_GAP = 0.15;
// Position-aware blending
export const BLEND_TOP3_RRF_WEIGHT = 0.75;
export const BLEND_TOP10_RRF_WEIGHT = 0.65;
export const BLEND_TAIL_RRF_WEIGHT = 0.5;
// Intent — matches qmd's weights from store.ts
export const INTENT_WEIGHT_CHUNK = 0.5;
// Common stop words filtered from intent strings before tokenization.
// Ported from qmd's INTENT_STOP_WORDS (store.ts).
export const INTENT_STOP_WORDS = new Set([
"am", "an", "as", "at", "be", "by", "do", "he", "if",
"in", "is", "it", "me", "my", "no", "of", "on", "or", "so",
"to", "up", "us", "we",
"all", "and", "any", "are", "but", "can", "did", "for", "get",
"has", "her", "him", "his", "how", "its", "let", "may", "not",
"our", "out", "the", "too", "was", "who", "why", "you",
"also", "does", "find", "from", "have", "into", "more", "need",
"show", "some", "tell", "that", "them", "this", "want", "what",
"when", "will", "with", "your",
"about", "looking", "notes", "search", "where", "which",
]);
// BM25
export const BM25_K1 = 1.2;
export const BM25_B = 0.75;
// Search
export const RERANK_CANDIDATE_LIMIT = 40;
export const RERANK_CONTEXT_TOKENS = 2048;
// Embedding templates (embeddinggemma format)
export const EMBED_QUERY_TEMPLATE = (query: string) =>
`task: search result | query: ${query}`;
export const EMBED_DOC_TEMPLATE = (title: string, body: string) =>
`title: ${title} | text: ${body}`;
// Model IDs for Transformers.js
export const MODEL_EMBEDDING = "shreyask/embeddinggemma-300m-ONNX";
export const MODEL_RERANKER = "onnx-community/Qwen3-Reranker-0.6B-ONNX";
export const MODEL_EXPANSION = "shreyask/qmd-query-expansion-1.7B-ONNX";
// Example queries with optional intents
export const EXAMPLE_QUERIES: { query: string; intent?: string }[] = [
{ query: "API versioning best practices" },
{ query: "distributed consensus algorithms" },
{ query: "gradient descent optimization", intent: "training neural networks" },
{ query: "how did coffee spread around the world" },
{ query: "performance", intent: "web page load times" },
];