import { useState, useEffect, useCallback, useRef } from 'react'; import type { Document, Chunk, EmbeddedChunk, ModelState } from './types'; import { loadAllModels, isAllModelsReady } from './pipeline/models'; import { chunkDocument, extractTitle } from './pipeline/chunking'; import { embedDocChunksBatch } from './pipeline/embeddings'; import { BM25Index } from './pipeline/bm25'; import { runPipeline } from './pipeline/orchestrator'; import type { PipelineState } from './components/PipelineView'; import QueryInput from './components/QueryInput'; import ModelStatus from './components/ModelStatus'; import PipelineView from './components/PipelineView'; import DocumentManager from './components/DocumentManager'; const SAMPLE_DOCS = [ 'api-design-principles.md', 'distributed-systems-overview.md', 'machine-learning-primer.md', 'history-of-coffee.md', ]; const SHOWCASE_CARDS = [ { title: 'Faithful to qmd', body: 'BM25, vector search, query expansion, RRF fusion, and reranking follow the upstream retrieval recipe instead of flattening everything into one model call.', }, { title: 'Browser-native bits', body: 'Transformers.js and WebGPU run the pipeline locally, cache model weights in the browser, and expose each stage so the search system stays inspectable.', }, ]; const INDEX_BATCH_SIZE = 8; const INITIAL_PIPELINE: PipelineState = { expansion: { status: 'idle' }, search: { status: 'idle' }, rrf: { status: 'idle' }, rerank: { status: 'idle' }, blend: { status: 'idle' }, }; function upsertDocuments(current: Document[], incoming: Document[]): Document[] { const merged = new Map(current.map((doc) => [doc.id, doc])); for (const doc of incoming) { merged.set(doc.id, doc); } return [...merged.values()]; } function ShowcaseCard({ title, body }: { title: string; body: string }) { return (

{title}

{body}

); } function App() { const [models, setModels] = useState([ { name: 'embedding', status: 'pending', progress: 0 }, { name: 'reranker', status: 'pending', progress: 0 }, { name: 'expansion', status: 'pending', progress: 0 }, ]); const [documents, setDocuments] = useState([]); const [chunks, setChunks] = useState([]); const [embeddedChunks, setEmbeddedChunks] = useState([]); const [bm25Index, setBm25Index] = useState(null); const [pipeline, setPipeline] = useState(INITIAL_PIPELINE); const [indexing, setIndexing] = useState(false); const [indexingProgress, setIndexingProgress] = useState({ completed: 0, total: 0 }); const [query, setQuery] = useState(''); const [intent, setIntent] = useState(); const [dark, setDark] = useState(() => document.documentElement.getAttribute('data-theme') === 'dark', ); const searchRunIdRef = useRef(0); const embeddingReady = models.find((model) => model.name === 'embedding')?.status === 'ready'; useEffect(() => { loadAllModels((state) => { setModels((prev) => prev.map((model) => ( model.name === state.name ? state : model ))); }).catch(console.error); }, []); useEffect(() => { async function loadSampleDocs() { try { const loadedDocs = await Promise.all( SAMPLE_DOCS.map(async (filename) => { const response = await fetch(`/eval-docs/${filename}`); const body = await response.text(); const title = extractTitle(body, filename); return { id: filename, title, body, filepath: filename }; }), ); setDocuments((prev) => upsertDocuments(prev, loadedDocs)); } catch (error) { console.error(error); } } loadSampleDocs(); }, []); useEffect(() => { if (documents.length === 0) { setChunks([]); setEmbeddedChunks([]); setBm25Index(null); setIndexing(false); setIndexingProgress({ completed: 0, total: 0 }); return; } const nextChunks = documents.flatMap((doc) => chunkDocument(doc)); setChunks(nextChunks); setBm25Index(new BM25Index(nextChunks)); }, [documents]); useEffect(() => { let cancelled = false; if (!embeddingReady || chunks.length === 0) { setEmbeddedChunks([]); setIndexing(false); setIndexingProgress({ completed: 0, total: chunks.length }); return () => { cancelled = true; }; } async function embedChunks() { setIndexing(true); setIndexingProgress({ completed: 0, total: chunks.length }); const embedded: EmbeddedChunk[] = []; for (let i = 0; i < chunks.length; i += INDEX_BATCH_SIZE) { const batch = chunks.slice(i, i + INDEX_BATCH_SIZE); const embeddings = await embedDocChunksBatch( batch.map((chunk) => ({ title: chunk.title, text: chunk.text })), ); if (cancelled) return; for (let j = 0; j < batch.length; j++) { const chunk = batch[j]; const embedding = embeddings[j]; if (!chunk || !embedding) continue; embedded.push({ ...chunk, embedding }); } setIndexingProgress({ completed: Math.min(i + batch.length, chunks.length), total: chunks.length, }); } if (cancelled) return; setEmbeddedChunks(embedded); setIndexing(false); } embedChunks().catch((error) => { if (cancelled) return; console.error(error); setEmbeddedChunks([]); setIndexing(false); }); return () => { cancelled = true; }; }, [chunks, embeddingReady]); const handleUpload = useCallback(async (files: FileList) => { const uploadedDocs = await Promise.all( Array.from(files).map(async (file) => { const body = await file.text(); const title = extractTitle(body, file.name); return { id: file.name, title, body, filepath: file.name }; }), ); setDocuments((prev) => upsertDocuments(prev, uploadedDocs)); }, []); const handlePaste = useCallback((text: string, filename: string) => { const title = extractTitle(text, filename); setDocuments((prev) => upsertDocuments(prev, [ { id: filename, title, body: text, filepath: filename }, ])); }, []); const handleSearch = useCallback(async (searchQuery: string, searchIntent?: string) => { if (!bm25Index || embeddedChunks.length === 0) return; const runId = ++searchRunIdRef.current; setQuery(searchQuery); setIntent(searchIntent); setPipeline(INITIAL_PIPELINE); const generator = runPipeline({ query: searchQuery, intent: searchIntent, embeddedChunks, bm25Index, }); for await (const event of generator) { if (searchRunIdRef.current !== runId) return; setPipeline((prev) => ({ ...prev, [event.stage]: { status: event.status, ...('data' in event ? { data: event.data } : {}), ...('error' in event ? { error: event.error } : {}), }, })); } }, [bm25Index, embeddedChunks]); const allReady = isAllModelsReady() && embeddedChunks.length > 0 && !indexing; const toggleTheme = useCallback(() => { setDark((prev) => { const next = !prev; document.documentElement.setAttribute('data-theme', next ? 'dark' : 'light'); localStorage.setItem('qmd-theme', next ? 'dark' : 'light'); return next; }); }, []); return (

QMD in the browser

QMD Web Sandbox

A browser-native sandbox that recreates the core{' '} qmd {' '}retrieval pipeline with Transformers.js, while making the local WebGPU execution path visible. Documents are chunked, embedded, searched, fused, reranked, and inspected entirely in the browser.

{[ { label: 'WebGPU', color: '#4285F4' }, { label: 'Local cache', color: '#34a853' }, { label: 'Transparent pipeline', color: '#00897b' }, ].map(badge => ( {badge.label} ))}

{ event.currentTarget.style.color = '#4285F4'; }} onMouseLeave={(event) => { event.currentTarget.style.color = 'var(--text-secondary)'; }} > Original qmd

{SHOWCASE_CARDS.map((card) => ( ))}

{indexing && (

Indexing local chunks in the browser ({indexingProgress.completed}/{indexingProgress.total})...

)} {query && } ({ id: doc.id, title: doc.title, filepath: doc.filepath }))} onUpload={handleUpload} onPaste={handlePaste} />

); } export default App;