// Chunking export const CHUNK_SIZE_TOKENS = 900; export const CHUNK_OVERLAP_TOKENS = 135; // 15% export const CHUNK_SIZE_CHARS = 3600; // ~4 chars/token export const CHUNK_OVERLAP_CHARS = 540; // RRF export const RRF_K = 60; export const RRF_PRIMARY_WEIGHT = 2.0; export const RRF_SECONDARY_WEIGHT = 1.0; export const RRF_RANK1_BONUS = 0.05; export const RRF_RANK2_BONUS = 0.02; // Strong lexical match detection export const STRONG_SIGNAL_MIN_SCORE = 0.85; export const STRONG_SIGNAL_MIN_GAP = 0.15; // Position-aware blending export const BLEND_TOP3_RRF_WEIGHT = 0.75; export const BLEND_TOP10_RRF_WEIGHT = 0.65; export const BLEND_TAIL_RRF_WEIGHT = 0.5; // Intent — matches qmd's weights from store.ts export const INTENT_WEIGHT_CHUNK = 0.5; // Common stop words filtered from intent strings before tokenization. // Ported from qmd's INTENT_STOP_WORDS (store.ts). export const INTENT_STOP_WORDS = new Set([ "am", "an", "as", "at", "be", "by", "do", "he", "if", "in", "is", "it", "me", "my", "no", "of", "on", "or", "so", "to", "up", "us", "we", "all", "and", "any", "are", "but", "can", "did", "for", "get", "has", "her", "him", "his", "how", "its", "let", "may", "not", "our", "out", "the", "too", "was", "who", "why", "you", "also", "does", "find", "from", "have", "into", "more", "need", "show", "some", "tell", "that", "them", "this", "want", "what", "when", "will", "with", "your", "about", "looking", "notes", "search", "where", "which", ]); // BM25 export const BM25_K1 = 1.2; export const BM25_B = 0.75; // Search export const RERANK_CANDIDATE_LIMIT = 40; export const RERANK_CONTEXT_TOKENS = 2048; // Embedding templates (embeddinggemma format) export const EMBED_QUERY_TEMPLATE = (query: string) => `task: search result | query: ${query}`; export const EMBED_DOC_TEMPLATE = (title: string, body: string) => `title: ${title} | text: ${body}`; // Model IDs for Transformers.js export const MODEL_EMBEDDING = "shreyask/embeddinggemma-300m-ONNX"; export const MODEL_RERANKER = "onnx-community/Qwen3-Reranker-0.6B-ONNX"; export const MODEL_EXPANSION = "shreyask/qmd-query-expansion-1.7B-ONNX"; // Example queries with optional intents export const EXAMPLE_QUERIES: { query: string; intent?: string }[] = [ { query: "API versioning best practices" }, { query: "distributed consensus algorithms" }, { query: "gradient descent optimization", intent: "training neural networks" }, { query: "how did coffee spread around the world" }, { query: "performance", intent: "web page load times" }, ];