| import { useState, useEffect, useCallback, useRef } from 'react'; | |
| import type { Document, Chunk, EmbeddedChunk, ModelState } from './types'; | |
| import { loadAllModels, isAllModelsReady } from './pipeline/models'; | |
| import { chunkDocument, extractTitle } from './pipeline/chunking'; | |
| import { embedDocChunksBatch } from './pipeline/embeddings'; | |
| import { BM25Index } from './pipeline/bm25'; | |
| import { runPipeline } from './pipeline/orchestrator'; | |
| import type { PipelineState } from './components/PipelineView'; | |
| import QueryInput from './components/QueryInput'; | |
| import ModelStatus from './components/ModelStatus'; | |
| import PipelineView from './components/PipelineView'; | |
| import DocumentManager from './components/DocumentManager'; | |
| const SAMPLE_DOCS = [ | |
| 'api-design-principles.md', | |
| 'distributed-systems-overview.md', | |
| 'machine-learning-primer.md', | |
| 'history-of-coffee.md', | |
| ]; | |
| const SHOWCASE_CARDS = [ | |
| { | |
| title: 'Faithful to qmd', | |
| body: 'BM25, vector search, query expansion, RRF fusion, and reranking follow the upstream retrieval recipe instead of flattening everything into one model call.', | |
| }, | |
| { | |
| title: 'Browser-native bits', | |
| body: 'Transformers.js and WebGPU run the pipeline locally, cache model weights in the browser, and expose each stage so the search system stays inspectable.', | |
| }, | |
| ]; | |
| const INDEX_BATCH_SIZE = 8; | |
| const INITIAL_PIPELINE: PipelineState = { | |
| expansion: { status: 'idle' }, | |
| search: { status: 'idle' }, | |
| rrf: { status: 'idle' }, | |
| rerank: { status: 'idle' }, | |
| blend: { status: 'idle' }, | |
| }; | |
| function upsertDocuments(current: Document[], incoming: Document[]): Document[] { | |
| const merged = new Map(current.map((doc) => [doc.id, doc])); | |
| for (const doc of incoming) { | |
| merged.set(doc.id, doc); | |
| } | |
| return [...merged.values()]; | |
| } | |
| function ShowcaseCard({ title, body }: { title: string; body: string }) { | |
| return ( | |
| <div | |
| style={{ | |
| padding: '0.9rem 1rem', | |
| background: 'var(--bg-card)', | |
| border: '1px solid var(--border)', | |
| borderRadius: '10px', | |
| boxShadow: '0 2px 12px var(--shadow)', | |
| }} | |
| > | |
| <div | |
| style={{ | |
| marginBottom: '0.35rem', | |
| fontSize: '0.74rem', | |
| fontWeight: 700, | |
| letterSpacing: '0.08em', | |
| textTransform: 'uppercase', | |
| color: '#4285F4', | |
| }} | |
| > | |
| {title} | |
| </div> | |
| <p | |
| style={{ | |
| margin: 0, | |
| fontSize: '0.84rem', | |
| lineHeight: 1.6, | |
| color: 'var(--text-secondary)', | |
| }} | |
| > | |
| {body} | |
| </p> | |
| </div> | |
| ); | |
| } | |
| function App() { | |
| const [models, setModels] = useState<ModelState[]>([ | |
| { name: 'embedding', status: 'pending', progress: 0 }, | |
| { name: 'reranker', status: 'pending', progress: 0 }, | |
| { name: 'expansion', status: 'pending', progress: 0 }, | |
| ]); | |
| const [documents, setDocuments] = useState<Document[]>([]); | |
| const [chunks, setChunks] = useState<Chunk[]>([]); | |
| const [embeddedChunks, setEmbeddedChunks] = useState<EmbeddedChunk[]>([]); | |
| const [bm25Index, setBm25Index] = useState<BM25Index | null>(null); | |
| const [pipeline, setPipeline] = useState<PipelineState>(INITIAL_PIPELINE); | |
| const [indexing, setIndexing] = useState(false); | |
| const [indexingProgress, setIndexingProgress] = useState({ completed: 0, total: 0 }); | |
| const [query, setQuery] = useState(''); | |
| const [intent, setIntent] = useState<string | undefined>(); | |
| const [dark, setDark] = useState(() => | |
| document.documentElement.getAttribute('data-theme') === 'dark', | |
| ); | |
| const searchRunIdRef = useRef(0); | |
| const embeddingReady = models.find((model) => model.name === 'embedding')?.status === 'ready'; | |
| useEffect(() => { | |
| loadAllModels((state) => { | |
| setModels((prev) => prev.map((model) => ( | |
| model.name === state.name ? state : model | |
| ))); | |
| }).catch(console.error); | |
| }, []); | |
| useEffect(() => { | |
| async function loadSampleDocs() { | |
| try { | |
| const loadedDocs = await Promise.all( | |
| SAMPLE_DOCS.map(async (filename) => { | |
| const response = await fetch(`/eval-docs/${filename}`); | |
| const body = await response.text(); | |
| const title = extractTitle(body, filename); | |
| return { id: filename, title, body, filepath: filename }; | |
| }), | |
| ); | |
| setDocuments((prev) => upsertDocuments(prev, loadedDocs)); | |
| } catch (error) { | |
| console.error(error); | |
| } | |
| } | |
| loadSampleDocs(); | |
| }, []); | |
| useEffect(() => { | |
| if (documents.length === 0) { | |
| setChunks([]); | |
| setEmbeddedChunks([]); | |
| setBm25Index(null); | |
| setIndexing(false); | |
| setIndexingProgress({ completed: 0, total: 0 }); | |
| return; | |
| } | |
| const nextChunks = documents.flatMap((doc) => chunkDocument(doc)); | |
| setChunks(nextChunks); | |
| setBm25Index(new BM25Index(nextChunks)); | |
| }, [documents]); | |
| useEffect(() => { | |
| let cancelled = false; | |
| if (!embeddingReady || chunks.length === 0) { | |
| setEmbeddedChunks([]); | |
| setIndexing(false); | |
| setIndexingProgress({ completed: 0, total: chunks.length }); | |
| return () => { | |
| cancelled = true; | |
| }; | |
| } | |
| async function embedChunks() { | |
| setIndexing(true); | |
| setIndexingProgress({ completed: 0, total: chunks.length }); | |
| const embedded: EmbeddedChunk[] = []; | |
| for (let i = 0; i < chunks.length; i += INDEX_BATCH_SIZE) { | |
| const batch = chunks.slice(i, i + INDEX_BATCH_SIZE); | |
| const embeddings = await embedDocChunksBatch( | |
| batch.map((chunk) => ({ title: chunk.title, text: chunk.text })), | |
| ); | |
| if (cancelled) return; | |
| for (let j = 0; j < batch.length; j++) { | |
| const chunk = batch[j]; | |
| const embedding = embeddings[j]; | |
| if (!chunk || !embedding) continue; | |
| embedded.push({ ...chunk, embedding }); | |
| } | |
| setIndexingProgress({ | |
| completed: Math.min(i + batch.length, chunks.length), | |
| total: chunks.length, | |
| }); | |
| } | |
| if (cancelled) return; | |
| setEmbeddedChunks(embedded); | |
| setIndexing(false); | |
| } | |
| embedChunks().catch((error) => { | |
| if (cancelled) return; | |
| console.error(error); | |
| setEmbeddedChunks([]); | |
| setIndexing(false); | |
| }); | |
| return () => { | |
| cancelled = true; | |
| }; | |
| }, [chunks, embeddingReady]); | |
| const handleUpload = useCallback(async (files: FileList) => { | |
| const uploadedDocs = await Promise.all( | |
| Array.from(files).map(async (file) => { | |
| const body = await file.text(); | |
| const title = extractTitle(body, file.name); | |
| return { id: file.name, title, body, filepath: file.name }; | |
| }), | |
| ); | |
| setDocuments((prev) => upsertDocuments(prev, uploadedDocs)); | |
| }, []); | |
| const handlePaste = useCallback((text: string, filename: string) => { | |
| const title = extractTitle(text, filename); | |
| setDocuments((prev) => upsertDocuments(prev, [ | |
| { id: filename, title, body: text, filepath: filename }, | |
| ])); | |
| }, []); | |
| const handleSearch = useCallback(async (searchQuery: string, searchIntent?: string) => { | |
| if (!bm25Index || embeddedChunks.length === 0) return; | |
| const runId = ++searchRunIdRef.current; | |
| setQuery(searchQuery); | |
| setIntent(searchIntent); | |
| setPipeline(INITIAL_PIPELINE); | |
| const generator = runPipeline({ | |
| query: searchQuery, | |
| intent: searchIntent, | |
| embeddedChunks, | |
| bm25Index, | |
| }); | |
| for await (const event of generator) { | |
| if (searchRunIdRef.current !== runId) return; | |
| setPipeline((prev) => ({ | |
| ...prev, | |
| [event.stage]: { | |
| status: event.status, | |
| ...('data' in event ? { data: event.data } : {}), | |
| ...('error' in event ? { error: event.error } : {}), | |
| }, | |
| })); | |
| } | |
| }, [bm25Index, embeddedChunks]); | |
| const allReady = isAllModelsReady() && embeddedChunks.length > 0 && !indexing; | |
| const toggleTheme = useCallback(() => { | |
| setDark((prev) => { | |
| const next = !prev; | |
| document.documentElement.setAttribute('data-theme', next ? 'dark' : 'light'); | |
| localStorage.setItem('qmd-theme', next ? 'dark' : 'light'); | |
| return next; | |
| }); | |
| }, []); | |
| return ( | |
| <div | |
| style={{ | |
| fontFamily: 'system-ui, -apple-system, sans-serif', | |
| maxWidth: 1400, | |
| margin: '0 auto', | |
| padding: '1.25rem 1rem 2rem', | |
| }} | |
| > | |
| <style>{` | |
| .showcase-grid { | |
| display: grid; | |
| grid-template-columns: repeat(2, minmax(0, 1fr)); | |
| gap: 0.85rem; | |
| margin-top: 1rem; | |
| } | |
| @media (max-width: 900px) { | |
| .showcase-grid { | |
| grid-template-columns: 1fr; | |
| } | |
| } | |
| `}</style> | |
| <header style={{ marginBottom: '1.5rem' }}> | |
| <div style={{ display: 'flex', alignItems: 'flex-start', justifyContent: 'space-between', gap: '1rem' }}> | |
| <div style={{ flex: 1 }}> | |
| <div | |
| style={{ | |
| marginBottom: '0.4rem', | |
| fontSize: '0.74rem', | |
| fontWeight: 700, | |
| letterSpacing: '0.08em', | |
| textTransform: 'uppercase', | |
| color: '#4285F4', | |
| }} | |
| > | |
| QMD in the browser | |
| </div> | |
| <h1 style={{ margin: 0, fontSize: '1.7rem', color: 'var(--text)' }}> | |
| QMD Web Sandbox | |
| </h1> | |
| <p style={{ margin: '0.45rem 0 0', color: 'var(--text-secondary)', fontSize: '0.9rem', lineHeight: 1.65, maxWidth: 860 }}> | |
| A browser-native sandbox that recreates the core{' '} | |
| <a href="https://github.com/tobi/qmd" target="_blank" rel="noopener noreferrer" style={{ color: '#4285F4', textDecoration: 'none' }}>qmd</a> | |
| {' '}retrieval pipeline with Transformers.js, while making the local WebGPU execution path visible. | |
| Documents are chunked, embedded, searched, fused, reranked, and inspected entirely in the browser. | |
| </p> | |
| <div | |
| style={{ | |
| marginTop: '0.7rem', | |
| display: 'inline-flex', | |
| alignItems: 'center', | |
| gap: '0.35rem', | |
| flexWrap: 'wrap', | |
| }} | |
| > | |
| {[ | |
| { label: 'WebGPU', color: '#4285F4' }, | |
| { label: 'Local cache', color: '#34a853' }, | |
| { label: 'Transparent pipeline', color: '#00897b' }, | |
| ].map(badge => ( | |
| <span | |
| key={badge.label} | |
| style={{ | |
| padding: '0.25rem 0.55rem', | |
| borderRadius: '999px', | |
| border: `1px solid ${badge.color}30`, | |
| background: `${badge.color}10`, | |
| color: badge.color, | |
| fontSize: '0.72rem', | |
| fontWeight: 600, | |
| fontFamily: 'system-ui, -apple-system, sans-serif', | |
| whiteSpace: 'nowrap', | |
| }} | |
| > | |
| {badge.label} | |
| </span> | |
| ))} | |
| </div> | |
| </div> | |
| <div style={{ display: 'flex', alignItems: 'center', gap: '0.75rem', flexShrink: 0 }}> | |
| <a | |
| href="https://github.com/tobi/qmd" | |
| target="_blank" | |
| rel="noopener noreferrer" | |
| style={{ | |
| fontSize: '0.78rem', | |
| color: 'var(--text-secondary)', | |
| textDecoration: 'none', | |
| padding: '0.35rem 0.7rem', | |
| border: '1px solid var(--border)', | |
| borderRadius: '999px', | |
| fontFamily: 'system-ui, -apple-system, sans-serif', | |
| background: 'var(--bg-card)', | |
| }} | |
| onMouseEnter={(event) => { event.currentTarget.style.color = '#4285F4'; }} | |
| onMouseLeave={(event) => { event.currentTarget.style.color = 'var(--text-secondary)'; }} | |
| > | |
| Original qmd | |
| </a> | |
| <button | |
| onClick={toggleTheme} | |
| title={dark ? 'Switch to light mode' : 'Switch to dark mode'} | |
| style={{ | |
| background: 'var(--bg-card)', | |
| border: '1px solid var(--border)', | |
| borderRadius: '999px', | |
| padding: '0.35rem 0.6rem', | |
| cursor: 'pointer', | |
| fontSize: '1rem', | |
| lineHeight: 1, | |
| color: 'var(--text)', | |
| }} | |
| > | |
| {dark ? '\u2600' : '\u263E'} | |
| </button> | |
| </div> | |
| </div> | |
| <div className="showcase-grid"> | |
| {SHOWCASE_CARDS.map((card) => ( | |
| <ShowcaseCard key={card.title} title={card.title} body={card.body} /> | |
| ))} | |
| </div> | |
| </header> | |
| <ModelStatus models={models} /> | |
| {indexing && ( | |
| <div | |
| style={{ | |
| padding: '0.6rem 1rem', | |
| background: 'var(--indexing-bg)', | |
| borderRadius: 8, | |
| marginBottom: '1rem', | |
| fontSize: '0.84rem', | |
| color: 'var(--text)', | |
| border: '1px solid var(--border)', | |
| }} | |
| > | |
| Indexing local chunks in the browser ({indexingProgress.completed}/{indexingProgress.total})... | |
| </div> | |
| )} | |
| <QueryInput onSearch={handleSearch} disabled={!allReady} /> | |
| {query && <PipelineView state={pipeline} query={query} intent={intent} />} | |
| <DocumentManager | |
| documents={documents.map((doc) => ({ id: doc.id, title: doc.title, filepath: doc.filepath }))} | |
| onUpload={handleUpload} | |
| onPaste={handlePaste} | |
| /> | |
| </div> | |
| ); | |
| } | |
| export default App; | |