File size: 2,546 Bytes
4f658bf
 
 
 
 
 
 
 
 
 
 
 
 
eb89325
 
 
 
 
 
ac50275
 
eb89325
4f658bf
8a8f6ee
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4f658bf
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a0d92b7
4f658bf
0ecf8a9
4f658bf
8a8f6ee
 
 
 
 
 
 
4f658bf
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
// Chunking
export const CHUNK_SIZE_TOKENS = 900;
export const CHUNK_OVERLAP_TOKENS = 135; // 15%
export const CHUNK_SIZE_CHARS = 3600; // ~4 chars/token
export const CHUNK_OVERLAP_CHARS = 540;

// RRF
export const RRF_K = 60;
export const RRF_PRIMARY_WEIGHT = 2.0;
export const RRF_SECONDARY_WEIGHT = 1.0;
export const RRF_RANK1_BONUS = 0.05;
export const RRF_RANK2_BONUS = 0.02;

// Strong lexical match detection
export const STRONG_SIGNAL_MIN_SCORE = 0.85;
export const STRONG_SIGNAL_MIN_GAP = 0.15;

// Position-aware blending
export const BLEND_TOP3_RRF_WEIGHT = 0.75;
export const BLEND_TOP10_RRF_WEIGHT = 0.65;
export const BLEND_TAIL_RRF_WEIGHT = 0.5;


// Intent — matches qmd's weights from store.ts
export const INTENT_WEIGHT_CHUNK = 0.5;

// Common stop words filtered from intent strings before tokenization.
// Ported from qmd's INTENT_STOP_WORDS (store.ts).
export const INTENT_STOP_WORDS = new Set([
  "am", "an", "as", "at", "be", "by", "do", "he", "if",
  "in", "is", "it", "me", "my", "no", "of", "on", "or", "so",
  "to", "up", "us", "we",
  "all", "and", "any", "are", "but", "can", "did", "for", "get",
  "has", "her", "him", "his", "how", "its", "let", "may", "not",
  "our", "out", "the", "too", "was", "who", "why", "you",
  "also", "does", "find", "from", "have", "into", "more", "need",
  "show", "some", "tell", "that", "them", "this", "want", "what",
  "when", "will", "with", "your",
  "about", "looking", "notes", "search", "where", "which",
]);

// BM25
export const BM25_K1 = 1.2;
export const BM25_B = 0.75;

// Search
export const RERANK_CANDIDATE_LIMIT = 40;
export const RERANK_CONTEXT_TOKENS = 2048;

// Embedding templates (embeddinggemma format)
export const EMBED_QUERY_TEMPLATE = (query: string) =>
  `task: search result | query: ${query}`;
export const EMBED_DOC_TEMPLATE = (title: string, body: string) =>
  `title: ${title} | text: ${body}`;

// Model IDs for Transformers.js
export const MODEL_EMBEDDING = "shreyask/embeddinggemma-300m-ONNX";
export const MODEL_RERANKER = "onnx-community/Qwen3-Reranker-0.6B-ONNX";
export const MODEL_EXPANSION = "shreyask/qmd-query-expansion-1.7B-ONNX";

// Example queries with optional intents
export const EXAMPLE_QUERIES: { query: string; intent?: string }[] = [
  { query: "API versioning best practices" },
  { query: "distributed consensus algorithms" },
  { query: "gradient descent optimization", intent: "training neural networks" },
  { query: "how did coffee spread around the world" },
  { query: "performance", intent: "web page load times" },
];