shreyask Claude Opus 4.6 commited on
Commit
4f658bf
·
verified ·
1 Parent(s): 2e15698

add types and constants ported from QMD pipeline

Browse files

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

Files changed (2) hide show
  1. src/constants.ts +43 -0
  2. src/types.ts +100 -0
src/constants.ts ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ // Chunking
2
+ export const CHUNK_SIZE_TOKENS = 900;
3
+ export const CHUNK_OVERLAP_TOKENS = 135; // 15%
4
+ export const CHUNK_SIZE_CHARS = 3600; // ~4 chars/token
5
+ export const CHUNK_OVERLAP_CHARS = 540;
6
+
7
+ // RRF
8
+ export const RRF_K = 60;
9
+ export const RRF_PRIMARY_WEIGHT = 2.0;
10
+ export const RRF_SECONDARY_WEIGHT = 1.0;
11
+ export const RRF_RANK1_BONUS = 0.05;
12
+ export const RRF_RANK2_BONUS = 0.02;
13
+
14
+ // Score Blending
15
+ export const BLEND_TOP3_RRF = 0.75;
16
+ export const BLEND_MID_RRF = 0.6;
17
+ export const BLEND_TAIL_RRF = 0.4;
18
+
19
+ // BM25
20
+ export const BM25_K1 = 1.2;
21
+ export const BM25_B = 0.75;
22
+
23
+ // Search
24
+ export const RERANK_CANDIDATE_LIMIT = 40;
25
+ export const RERANK_CONTEXT_TOKENS = 2048;
26
+
27
+ // Embedding templates (embeddinggemma format)
28
+ export const EMBED_QUERY_TEMPLATE = (query: string) =>
29
+ `task: search result | query: ${query}`;
30
+ export const EMBED_DOC_TEMPLATE = (title: string, body: string) =>
31
+ `title: ${title} | text: ${body}`;
32
+
33
+ // Model IDs for Transformers.js
34
+ export const MODEL_EMBEDDING = "onnx-community/embeddinggemma-300m-ONNX";
35
+ export const MODEL_RERANKER = "onnx-community/Qwen3-Reranker-0.6B-ONNX";
36
+ export const MODEL_EXPANSION = "shreyask/qmd-query-expansion-1.7B-ONNX"; // placeholder
37
+
38
+ // Example queries
39
+ export const EXAMPLE_QUERIES = [
40
+ "API versioning best practices",
41
+ "distributed consensus algorithms",
42
+ "gradient descent optimization",
43
+ ];
src/types.ts ADDED
@@ -0,0 +1,100 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ // A document loaded into the demo
2
+ export interface Document {
3
+ id: string; // filename-based
4
+ title: string; // extracted from H1 or filename
5
+ body: string; // full text content
6
+ filepath: string; // e.g. "api-design-principles.md"
7
+ }
8
+
9
+ // A chunk of a document
10
+ export interface Chunk {
11
+ docId: string;
12
+ chunkIndex: number;
13
+ text: string;
14
+ startChar: number; // offset in original doc
15
+ title: string; // parent doc title
16
+ }
17
+
18
+ // A chunk with its embedding vector
19
+ export interface EmbeddedChunk extends Chunk {
20
+ embedding: Float32Array;
21
+ }
22
+
23
+ // Search result from BM25 or vector search
24
+ export interface ScoredChunk {
25
+ chunk: Chunk;
26
+ score: number;
27
+ source: "bm25" | "vector";
28
+ }
29
+
30
+ // Result after RRF fusion
31
+ export interface RRFResult {
32
+ docId: string;
33
+ filepath: string;
34
+ title: string;
35
+ bestChunk: string;
36
+ score: number;
37
+ contributions: RRFContribution[];
38
+ }
39
+
40
+ export interface RRFContribution {
41
+ source: "bm25" | "vector";
42
+ queryType: "original" | "lex" | "vec" | "hyde";
43
+ query: string;
44
+ rank: number;
45
+ weight: number;
46
+ rrfContribution: number;
47
+ }
48
+
49
+ // Result after reranking
50
+ export interface RerankedResult extends RRFResult {
51
+ rerankScore: number;
52
+ blendedScore: number;
53
+ }
54
+
55
+ // Final result
56
+ export interface FinalResult {
57
+ filepath: string;
58
+ title: string;
59
+ bestChunk: string;
60
+ score: number; // blended score
61
+ docId: string;
62
+ }
63
+
64
+ // Query expansion output
65
+ export interface ExpandedQuery {
66
+ hyde: string; // hypothetical document snippet
67
+ vec: string[]; // dense retrieval sentences
68
+ lex: string; // BM25 keywords
69
+ }
70
+
71
+ // Pipeline events for React UI
72
+ export type PipelineStage = "expansion" | "search" | "rrf" | "rerank" | "blend";
73
+ export type PipelineStatus = "idle" | "running" | "done" | "error";
74
+
75
+ export type PipelineEvent =
76
+ | { stage: "expansion"; status: "running" }
77
+ | { stage: "expansion"; status: "done"; data: ExpandedQuery }
78
+ | { stage: "expansion"; status: "error"; error: string }
79
+ | { stage: "search"; status: "running" }
80
+ | {
81
+ stage: "search";
82
+ status: "done";
83
+ data: { bm25Hits: ScoredChunk[]; vectorHits: ScoredChunk[] };
84
+ }
85
+ | { stage: "rrf"; status: "done"; data: { merged: RRFResult[] } }
86
+ | { stage: "rerank"; status: "running" }
87
+ | {
88
+ stage: "rerank";
89
+ status: "done";
90
+ data: { before: RRFResult[]; after: RerankedResult[] };
91
+ }
92
+ | { stage: "blend"; status: "done"; data: { finalResults: FinalResult[] } };
93
+
94
+ // Model loading state
95
+ export interface ModelState {
96
+ name: string;
97
+ status: "pending" | "downloading" | "loading" | "ready" | "error";
98
+ progress: number; // 0-1
99
+ error?: string;
100
+ }