| | |
| | <!DOCTYPE html> |
| | <html lang="en"> |
| | <head> |
| | <meta charset="UTF-8" /> |
| | <meta name="viewport" content="width=device-width, initial-scale=1.0" /> |
| | <title>Vector Search Methods Comparison</title> |
| | <style> |
| | body { |
| | font-family: "Segoe UI", Tahoma, Geneva, Verdana, sans-serif; |
| | line-height: 1.6; |
| | color: #333; |
| | max-width: 1200px; |
| | margin: 0 auto; |
| | padding: 20px; |
| | background-color: #f5f7fa; |
| | } |
| | |
| | h1, |
| | h2, |
| | h3 { |
| | color: #2c3e50; |
| | } |
| | |
| | h1 { |
| | text-align: center; |
| | margin-bottom: 40px; |
| | font-size: 2.2em; |
| | border-bottom: 2px solid #3498db; |
| | padding-bottom: 10px; |
| | } |
| | |
| | .container { |
| | display: flex; |
| | flex-wrap: wrap; |
| | gap: 20px; |
| | justify-content: center; |
| | } |
| | |
| | .search-type { |
| | flex: 1 1 500px; |
| | background: white; |
| | border-radius: 8px; |
| | box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1); |
| | margin-bottom: 30px; |
| | overflow: hidden; |
| | transition: transform 0.2s; |
| | } |
| | |
| | .search-type:hover { |
| | transform: translateY(-5px); |
| | } |
| | |
| | .search-header { |
| | padding: 15px 20px; |
| | color: white; |
| | font-weight: bold; |
| | font-size: 1.2em; |
| | } |
| | |
| | .search-content { |
| | padding: 20px; |
| | position: relative; |
| | } |
| | |
| | .enn .search-header { |
| | background-color: #3498db; |
| | } |
| | |
| | .ann .search-header { |
| | background-color: #e74c3c; |
| | } |
| | |
| | .semantic .search-header { |
| | background-color: #2ecc71; |
| | } |
| | |
| | .sparse .search-header { |
| | background-color: #9b59b6; |
| | } |
| | |
| | .canvas-container { |
| | position: relative; |
| | height: 300px; |
| | width: 100%; |
| | background: #f8f9fa; |
| | border: 1px solid #ddd; |
| | border-radius: 4px; |
| | margin-bottom: 15px; |
| | overflow: hidden; |
| | } |
| | |
| | canvas { |
| | display: block; |
| | } |
| | |
| | .controls { |
| | display: flex; |
| | justify-content: space-between; |
| | margin-bottom: 15px; |
| | flex-wrap: wrap; |
| | gap: 10px; |
| | } |
| | |
| | select, |
| | button { |
| | padding: 8px 12px; |
| | border-radius: 4px; |
| | border: 1px solid #ccc; |
| | background: white; |
| | font-size: 14px; |
| | } |
| | |
| | button { |
| | background: #3498db; |
| | color: white; |
| | border: none; |
| | cursor: pointer; |
| | transition: background 0.2s; |
| | } |
| | |
| | button:hover { |
| | background: #2980b9; |
| | } |
| | |
| | .step-display { |
| | background: #f0f4f8; |
| | padding: 15px; |
| | border-radius: 4px; |
| | margin-top: 15px; |
| | font-size: 14px; |
| | } |
| | |
| | .step-title { |
| | font-weight: bold; |
| | margin-bottom: 8px; |
| | } |
| | |
| | .step-description { |
| | color: #555; |
| | } |
| | |
| | ul.features { |
| | padding-left: 20px; |
| | } |
| | |
| | .features li { |
| | margin-bottom: 5px; |
| | } |
| | |
| | .distance-formula { |
| | font-style: italic; |
| | background: #f0f0f0; |
| | padding: 5px; |
| | border-radius: 4px; |
| | margin: 5px 0; |
| | display: inline-block; |
| | } |
| | |
| | .tooltip { |
| | position: absolute; |
| | background: rgba(0, 0, 0, 0.8); |
| | color: white; |
| | padding: 5px 10px; |
| | border-radius: 4px; |
| | font-size: 12px; |
| | z-index: 100; |
| | pointer-events: none; |
| | display: none; |
| | } |
| | |
| | .legend { |
| | display: flex; |
| | flex-wrap: wrap; |
| | gap: 15px; |
| | margin-top: 10px; |
| | } |
| | |
| | .legend-item { |
| | display: flex; |
| | align-items: center; |
| | font-size: 12px; |
| | } |
| | |
| | .legend-color { |
| | width: 12px; |
| | height: 12px; |
| | border-radius: 50%; |
| | margin-right: 5px; |
| | } |
| | |
| | .tabs { |
| | display: flex; |
| | margin-bottom: 15px; |
| | } |
| | |
| | .tab { |
| | padding: 8px 15px; |
| | background: #ddd; |
| | border: none; |
| | cursor: pointer; |
| | border-radius: 4px 4px 0 0; |
| | margin-right: 2px; |
| | } |
| | |
| | .tab.active { |
| | background: #f0f4f8; |
| | font-weight: bold; |
| | } |
| | |
| | .tab-content { |
| | display: none; |
| | background: #f0f4f8; |
| | padding: 15px; |
| | border-radius: 0 4px 4px 4px; |
| | } |
| | |
| | .tab-content.active { |
| | display: block; |
| | } |
| | |
| | table { |
| | width: 100%; |
| | border-collapse: collapse; |
| | margin: 15px 0; |
| | } |
| | |
| | table th, |
| | table td { |
| | border: 1px solid #ddd; |
| | padding: 8px; |
| | text-align: left; |
| | } |
| | |
| | table th { |
| | background-color: #f0f4f8; |
| | } |
| | |
| | tr:nth-child(even) { |
| | background-color: #f8f9fa; |
| | } |
| | |
| | .comparison-table { |
| | margin-top: 40px; |
| | } |
| | |
| | |
| | @media (max-width: 768px) { |
| | .search-type { |
| | flex: 1 1 100%; |
| | } |
| | |
| | .controls { |
| | flex-direction: column; |
| | } |
| | } |
| | </style> |
| | </head> |
| | <body> |
| | <h1>Vector Search Methods Comparison Simulation - By Pejman Ebrahimi</h1> |
| |
|
| | <div class="container"> |
| | |
| | <div class="search-type enn"> |
| | <div class="search-header">1. Exact Nearest Neighbor Search (ENN)</div> |
| | <div class="search-content"> |
| | <p> |
| | Finds the <strong>exact</strong> closest data points to a query by |
| | calculating distances to all vectors in the dataset. |
| | </p> |
| |
|
| | <div class="canvas-container"> |
| | <canvas id="ennCanvas" width="460" height="300"></canvas> |
| | <div id="ennTooltip" class="tooltip"></div> |
| | </div> |
| |
|
| | <div class="controls"> |
| | <div> |
| | <label for="ennDistance">Distance Metric:</label> |
| | <select id="ennDistance"> |
| | <option value="euclidean">Euclidean (L2)</option> |
| | <option value="manhattan">Manhattan (L1)</option> |
| | <option value="cosine">Cosine Similarity</option> |
| | </select> |
| | </div> |
| |
|
| | <div> |
| | <label for="ennStep">Step:</label> |
| | <select id="ennStep"> |
| | <option value="0">0. Data points</option> |
| | <option value="1">1. Calculate all distances</option> |
| | <option value="2">2. Sort by distance</option> |
| | <option value="3">3. Return nearest neighbors</option> |
| | </select> |
| | </div> |
| | </div> |
| |
|
| | <div class="step-display"> |
| | <div class="step-title" id="ennStepTitle">Step 0: Data points</div> |
| | <div class="step-description" id="ennStepDesc"> |
| | Initial dataset with vectors in feature space. The query point |
| | (red) will be compared against all data points. |
| | </div> |
| | </div> |
| |
|
| | <div class="legend"> |
| | <div class="legend-item"> |
| | <div class="legend-color" style="background: #3498db"></div> |
| | <span>Dataset Points</span> |
| | </div> |
| | <div class="legend-item"> |
| | <div class="legend-color" style="background: #e74c3c"></div> |
| | <span>Query Point</span> |
| | </div> |
| | <div class="legend-item"> |
| | <div class="legend-color" style="background: #2ecc71"></div> |
| | <span>Nearest Neighbor</span> |
| | </div> |
| | </div> |
| |
|
| | <h3>Key Features:</h3> |
| | <ul class="features"> |
| | <li>100% accuracy - finds the true nearest neighbors</li> |
| | <li> |
| | Computationally expensive for large datasets (O(n) complexity) |
| | </li> |
| | <li> |
| | Becomes inefficient in high dimensions (curse of dimensionality) |
| | </li> |
| | <li> |
| | Simple implementation - just calculate all distances and sort |
| | </li> |
| | </ul> |
| | </div> |
| | </div> |
| |
|
| | |
| | <div class="search-type ann"> |
| | <div class="search-header"> |
| | 2. Approximate Nearest Neighbor Search (ANN) |
| | </div> |
| | <div class="search-content"> |
| | <p> |
| | Sacrifices perfect accuracy for <strong>speed</strong> by using |
| | efficient data structures to approximate nearest neighbors. |
| | </p> |
| |
|
| | <div class="canvas-container"> |
| | <canvas id="annCanvas" width="460" height="300"></canvas> |
| | <div id="annTooltip" class="tooltip"></div> |
| | </div> |
| |
|
| | <div class="controls"> |
| | <div> |
| | <label for="annAlgorithm">Algorithm:</label> |
| | <select id="annAlgorithm"> |
| | <option value="hnsw">Hierarchical NSW</option> |
| | <option value="pq">Product Quantization</option> |
| | <option value="lsh">Locality-Sensitive Hashing</option> |
| | </select> |
| | </div> |
| |
|
| | <div> |
| | <label for="annStep">Step:</label> |
| | <select id="annStep"> |
| | <option value="0">0. Indexed structure</option> |
| | <option value="1">1. Navigate to region</option> |
| | <option value="2">2. Local search</option> |
| | <option value="3">3. Return approximate NN</option> |
| | </select> |
| | </div> |
| | </div> |
| |
|
| | <div class="step-display"> |
| | <div class="step-title" id="annStepTitle"> |
| | Step 0: Indexed structure |
| | </div> |
| | <div class="step-description" id="annStepDesc"> |
| | Data is pre-organized into efficient lookup structures that |
| | cluster or partition the vector space for faster searching. |
| | </div> |
| | </div> |
| |
|
| | <div class="legend"> |
| | <div class="legend-item"> |
| | <div class="legend-color" style="background: #3498db"></div> |
| | <span>Dataset Points</span> |
| | </div> |
| | <div class="legend-item"> |
| | <div class="legend-color" style="background: #e74c3c"></div> |
| | <span>Query Point</span> |
| | </div> |
| | <div class="legend-item"> |
| | <div class="legend-color" style="background: #f39c12"></div> |
| | <span>Search Region</span> |
| | </div> |
| | <div class="legend-item"> |
| | <div class="legend-color" style="background: #2ecc71"></div> |
| | <span>Returned Neighbors</span> |
| | </div> |
| | </div> |
| |
|
| | <h3>Key Features:</h3> |
| | <ul class="features"> |
| | <li> |
| | Much faster than ENN for large datasets (sub-linear time |
| | complexity) |
| | </li> |
| | <li>Trades accuracy for speed (95-99% accurate typically)</li> |
| | <li>Requires pre-processing to build index structures</li> |
| | <li>Various algorithms optimized for different use cases</li> |
| | </ul> |
| | </div> |
| | </div> |
| |
|
| | |
| | <div class="search-type semantic"> |
| | <div class="search-header">3. Semantic Search</div> |
| | <div class="search-content"> |
| | <p> |
| | Uses <strong>meaning</strong> of content rather than keywords by |
| | searching through dense embedding vectors that capture semantic |
| | relationships. |
| | </p> |
| |
|
| | <div class="canvas-container"> |
| | <canvas id="semanticCanvas" width="460" height="300"></canvas> |
| | <div id="semanticTooltip" class="tooltip"></div> |
| | </div> |
| |
|
| | <div class="controls"> |
| | <div> |
| | <label for="semanticModel">Embedding Model:</label> |
| | <select id="semanticModel"> |
| | <option value="bert">BERT</option> |
| | <option value="use">Universal Sentence Encoder</option> |
| | <option value="custom">Domain-Specific</option> |
| | </select> |
| | </div> |
| |
|
| | <div> |
| | <label for="semanticStep">Step:</label> |
| | <select id="semanticStep"> |
| | <option value="0">0. Text documents</option> |
| | <option value="1">1. Generate embeddings</option> |
| | <option value="2">2. Vector similarity search</option> |
| | <option value="3">3. Return relevant results</option> |
| | </select> |
| | </div> |
| | </div> |
| |
|
| | <div class="step-display"> |
| | <div class="step-title" id="semanticStepTitle"> |
| | Step 0: Text documents |
| | </div> |
| | <div class="step-description" id="semanticStepDesc"> |
| | Starting with raw text documents or queries before encoding into |
| | vector space. |
| | </div> |
| | </div> |
| |
|
| | <div class="legend"> |
| | <div class="legend-item"> |
| | <div class="legend-color" style="background: #3498db"></div> |
| | <span>Document Embeddings</span> |
| | </div> |
| | <div class="legend-item"> |
| | <div class="legend-color" style="background: #e74c3c"></div> |
| | <span>Query Embedding</span> |
| | </div> |
| | <div class="legend-item"> |
| | <div class="legend-color" style="background: #2ecc71"></div> |
| | <span>Semantic Matches</span> |
| | </div> |
| | </div> |
| |
|
| | <h3>Key Features:</h3> |
| | <ul class="features"> |
| | <li>Understands meaning beyond exact keyword matches</li> |
| | <li> |
| | Uses dense vector embeddings (typically 768-1536 dimensions) |
| | </li> |
| | <li>Trained on large text corpora to capture language patterns</li> |
| | <li> |
| | Effective for natural language, images, and multimodal content |
| | </li> |
| | <li>Usually implemented with ANN algorithms for efficiency</li> |
| | </ul> |
| | </div> |
| | </div> |
| |
|
| | |
| | <div class="search-type sparse"> |
| | <div class="search-header">4. Sparse Vector Search</div> |
| | <div class="search-content"> |
| | <p> |
| | Uses <strong>high-dimensional sparse vectors</strong> where most |
| | elements are zero, optimized for keyword and token matching. |
| | </p> |
| |
|
| | <div class="canvas-container"> |
| | <canvas id="sparseCanvas" width="460" height="300"></canvas> |
| | <div id="sparseTooltip" class="tooltip"></div> |
| | </div> |
| |
|
| | <div class="controls"> |
| | <div> |
| | <label for="sparseModel">Representation:</label> |
| | <select id="sparseModel"> |
| | <option value="tfidf">TF-IDF</option> |
| | <option value="bm25">BM25</option> |
| | <option value="hybrid">Hybrid (Sparse+Dense)</option> |
| | </select> |
| | </div> |
| |
|
| | <div> |
| | <label for="sparseStep">Step:</label> |
| | <select id="sparseStep"> |
| | <option value="0">0. Tokenized content</option> |
| | <option value="1">1. Create sparse vectors</option> |
| | <option value="2">2. Inverted index search</option> |
| | <option value="3">3. Return matches</option> |
| | </select> |
| | </div> |
| | </div> |
| |
|
| | <div class="step-display"> |
| | <div class="step-title" id="sparseStepTitle"> |
| | Step 0: Tokenized content |
| | </div> |
| | <div class="step-description" id="sparseStepDesc"> |
| | Documents broken down into tokens (words/terms) before converting |
| | to sparse vector representation. |
| | </div> |
| | </div> |
| |
|
| | <div class="legend"> |
| | <div class="legend-item"> |
| | <div class="legend-color" style="background: #3498db"></div> |
| | <span>Vocabulary Dimensions</span> |
| | </div> |
| | <div class="legend-item"> |
| | <div class="legend-color" style="background: #e74c3c"></div> |
| | <span>Query Terms</span> |
| | </div> |
| | <div class="legend-item"> |
| | <div class="legend-color" style="background: #2ecc71"></div> |
| | <span>Matching Terms</span> |
| | </div> |
| | </div> |
| |
|
| | <h3>Key Features:</h3> |
| | <ul class="features"> |
| | <li>Efficient for exact matching and keyword search</li> |
| | <li>Very high dimensionality (vocabulary size) but mostly zeros</li> |
| | <li>Uses specialized inverted index for quick lookup</li> |
| | <li>Good for precision when exact matches are required</li> |
| | <li>Often combined with semantic search for hybrid approaches</li> |
| | </ul> |
| | </div> |
| | </div> |
| | </div> |
| |
|
| | <div class="comparison-table"> |
| | <h2>Comparison of Vector Search Methods</h2> |
| | <table> |
| | <thead> |
| | <tr> |
| | <th>Feature</th> |
| | <th>Exact NN (ENN)</th> |
| | <th>Approximate NN (ANN)</th> |
| | <th>Semantic Search</th> |
| | <th>Sparse Vector Search</th> |
| | </tr> |
| | </thead> |
| | <tbody> |
| | <tr> |
| | <td>Accuracy</td> |
| | <td>100% exact</td> |
| | <td>High (95-99%)</td> |
| | <td>Context dependent</td> |
| | <td>High for exact matches</td> |
| | </tr> |
| | <tr> |
| | <td>Speed</td> |
| | <td>Slow (O(n))</td> |
| | <td>Fast (sub-linear)</td> |
| | <td>Moderate to fast</td> |
| | <td>Very fast for keywords</td> |
| | </tr> |
| | <tr> |
| | <td>Scalability</td> |
| | <td>Poor</td> |
| | <td>Good</td> |
| | <td>Good with ANN</td> |
| | <td>Excellent</td> |
| | </tr> |
| | <tr> |
| | <td>Vector Type</td> |
| | <td>Dense or Sparse</td> |
| | <td>Usually Dense</td> |
| | <td>Dense</td> |
| | <td>Sparse</td> |
| | </tr> |
| | <tr> |
| | <td>Use Cases</td> |
| | <td>Small datasets, high precision required</td> |
| | <td>Large-scale vector search, recommenders</td> |
| | <td>NLP, content discovery, similar item search</td> |
| | <td>Search engines, document retrieval</td> |
| | </tr> |
| | <tr> |
| | <td>Common Metrics</td> |
| | <td>Euclidean, Manhattan, Cosine</td> |
| | <td>Euclidean, Inner Product, Cosine</td> |
| | <td>Cosine, Dot Product</td> |
| | <td>Jaccard, BM25, TF-IDF</td> |
| | </tr> |
| | <tr> |
| | <td>Dimensions</td> |
| | <td>Any</td> |
| | <td>Moderate to high</td> |
| | <td>High (768-1536 typical)</td> |
| | <td>Very high (vocabulary size)</td> |
| | </tr> |
| | <tr> |
| | <td>Example Tools</td> |
| | <td>SciPy, NumPy</td> |
| | <td>FAISS, Annoy, HNSW</td> |
| | <td>Pinecone, Weaviate, Milvus</td> |
| | <td>Elasticsearch, Lucene</td> |
| | </tr> |
| | </tbody> |
| | </table> |
| | </div> |
| |
|
| | <script> |
| | |
| | const dataPoints = [ |
| | { id: 1, x: 80, y: 70, label: "P1" }, |
| | { id: 2, x: 160, y: 120, label: "P2" }, |
| | { id: 3, x: 240, y: 60, label: "P3" }, |
| | { id: 4, x: 300, y: 180, label: "P4" }, |
| | { id: 5, x: 400, y: 90, label: "P5" }, |
| | { id: 6, x: 180, y: 220, label: "P6" }, |
| | { id: 7, x: 320, y: 260, label: "P7" }, |
| | { id: 8, x: 370, y: 150, label: "P8" }, |
| | { id: 9, x: 130, y: 180, label: "P9" }, |
| | ]; |
| | |
| | const queryPoint = { x: 220, y: 140, label: "Q" }; |
| | |
| | |
| | const semanticDocs = [ |
| | { id: 1, text: "How to train a dog", embedding: [0.2, 0.7] }, |
| | { id: 2, text: "Dog training techniques", embedding: [0.25, 0.65] }, |
| | { id: 3, text: "Cat behavior explained", embedding: [0.7, 0.3] }, |
| | { id: 4, text: "Pet care for beginners", embedding: [0.4, 0.5] }, |
| | { id: 5, text: "Feline health issues", embedding: [0.8, 0.2] }, |
| | { id: 6, text: "Training puppies at home", embedding: [0.15, 0.75] }, |
| | { id: 7, text: "Bird watching guide", embedding: [0.9, 0.7] }, |
| | { id: 8, text: "Exotic pet ownership", embedding: [0.6, 0.8] }, |
| | { id: 9, text: "Dog breeds comparison", embedding: [0.3, 0.6] }, |
| | ]; |
| | |
| | const semanticQuery = { |
| | text: "How to train my puppy", |
| | embedding: [0.2, 0.8], |
| | }; |
| | |
| | |
| | const vocabulary = [ |
| | "dog", |
| | "cat", |
| | "train", |
| | "pet", |
| | "health", |
| | "food", |
| | "guide", |
| | "home", |
| | "behavior", |
| | "puppy", |
| | ]; |
| | |
| | const sparseVectors = [ |
| | { |
| | id: 1, |
| | text: "Dog training guide", |
| | vector: [0.8, 0, 0.7, 0.1, 0, 0, 0.3, 0, 0, 0], |
| | }, |
| | { |
| | id: 2, |
| | text: "Cat health and food", |
| | vector: [0, 0.9, 0, 0.2, 0.7, 0.6, 0, 0, 0, 0], |
| | }, |
| | { |
| | id: 3, |
| | text: "Puppy behavior at home", |
| | vector: [0.3, 0, 0, 0, 0, 0, 0, 0.7, 0.8, 0.9], |
| | }, |
| | { |
| | id: 4, |
| | text: "Pet food guide", |
| | vector: [0, 0, 0, 0.7, 0, 0.8, 0.6, 0, 0, 0], |
| | }, |
| | { |
| | id: 5, |
| | text: "Cat and dog behavior", |
| | vector: [0.5, 0.5, 0, 0, 0, 0, 0, 0, 0.9, 0], |
| | }, |
| | { |
| | id: 6, |
| | text: "Training your puppy", |
| | vector: [0, 0, 0.8, 0, 0, 0, 0, 0, 0, 0.8], |
| | }, |
| | ]; |
| | |
| | const sparseQuery = { |
| | text: "dog training puppies", |
| | vector: [0.6, 0, 0.7, 0, 0, 0, 0, 0, 0, 0.5], |
| | }; |
| | |
| | |
| | function euclideanDistance(p1, p2) { |
| | return Math.sqrt(Math.pow(p1.x - p2.x, 2) + Math.pow(p1.y - p2.y, 2)); |
| | } |
| | |
| | function manhattanDistance(p1, p2) { |
| | return Math.abs(p1.x - p2.x) + Math.abs(p1.y - p2.y); |
| | } |
| | |
| | function cosineDistance(p1, p2) { |
| | |
| | const dotProduct = p1.x * p2.x + p1.y * p2.y; |
| | const mag1 = Math.sqrt(p1.x * p1.x + p1.y * p1.y); |
| | const mag2 = Math.sqrt(p2.x * p2.x + p2.y * p2.y); |
| | return 1 - dotProduct / (mag1 * mag2); |
| | } |
| | |
| | function cosineSimilarity(v1, v2) { |
| | let dotProduct = 0; |
| | let mag1 = 0; |
| | let mag2 = 0; |
| | |
| | for (let i = 0; i < v1.length; i++) { |
| | dotProduct += v1[i] * v2[i]; |
| | mag1 += v1[i] * v1[i]; |
| | mag2 += v2[i] * v2[i]; |
| | } |
| | |
| | mag1 = Math.sqrt(mag1); |
| | mag2 = Math.sqrt(mag2); |
| | |
| | return dotProduct / (mag1 * mag2); |
| | } |
| | |
| | |
| | const ennCanvas = document.getElementById("ennCanvas"); |
| | const ennCtx = ennCanvas.getContext("2d"); |
| | const ennDistanceSelect = document.getElementById("ennDistance"); |
| | const ennStepSelect = document.getElementById("ennStep"); |
| | const ennStepTitle = document.getElementById("ennStepTitle"); |
| | const ennStepDesc = document.getElementById("ennStepDesc"); |
| | const ennTooltip = document.getElementById("ennTooltip"); |
| | |
| | |
| | const annCanvas = document.getElementById("annCanvas"); |
| | const annCtx = annCanvas.getContext("2d"); |
| | const annAlgorithmSelect = document.getElementById("annAlgorithm"); |
| | const annStepSelect = document.getElementById("annStep"); |
| | const annStepTitle = document.getElementById("annStepTitle"); |
| | const annStepDesc = document.getElementById("annStepDesc"); |
| | const annTooltip = document.getElementById("annTooltip"); |
| | |
| | |
| | const semanticCanvas = document.getElementById("semanticCanvas"); |
| | const semanticCtx = semanticCanvas.getContext("2d"); |
| | const semanticModelSelect = document.getElementById("semanticModel"); |
| | const semanticStepSelect = document.getElementById("semanticStep"); |
| | const semanticStepTitle = document.getElementById("semanticStepTitle"); |
| | const semanticStepDesc = document.getElementById("semanticStepDesc"); |
| | const semanticTooltip = document.getElementById("semanticTooltip"); |
| | |
| | |
| | const sparseCanvas = document.getElementById("sparseCanvas"); |
| | const sparseCtx = sparseCanvas.getContext("2d"); |
| | const sparseModelSelect = document.getElementById("sparseModel"); |
| | const sparseStepSelect = document.getElementById("sparseStep"); |
| | const sparseStepTitle = document.getElementById("sparseStepTitle"); |
| | const sparseStepDesc = document.getElementById("sparseStepDesc"); |
| | const sparseTooltip = document.getElementById("sparseTooltip"); |
| | |
| | |
| | ennDistanceSelect.addEventListener("change", renderENNSearch); |
| | ennStepSelect.addEventListener("change", renderENNSearch); |
| | |
| | |
| | annAlgorithmSelect.addEventListener("change", renderANNSearch); |
| | annStepSelect.addEventListener("change", renderANNSearch); |
| | |
| | |
| | semanticModelSelect.addEventListener("change", renderSemanticSearch); |
| | semanticStepSelect.addEventListener("change", renderSemanticSearch); |
| | |
| | |
| | sparseModelSelect.addEventListener("change", renderSparseSearch); |
| | sparseStepSelect.addEventListener("change", renderSparseSearch); |
| | |
| | |
| | renderENNSearch(); |
| | renderANNSearch(); |
| | renderSemanticSearch(); |
| | renderSparseSearch(); |
| | |
| | |
| | function renderENNSearch() { |
| | const distanceMetric = ennDistanceSelect.value; |
| | const step = parseInt(ennStepSelect.value); |
| | |
| | |
| | ennCtx.clearRect(0, 0, ennCanvas.width, ennCanvas.height); |
| | |
| | |
| | drawGrid(ennCtx); |
| | |
| | |
| | let distances = dataPoints.map((point) => { |
| | let dist; |
| | if (distanceMetric === "euclidean") { |
| | dist = euclideanDistance(point, queryPoint); |
| | } else if (distanceMetric === "manhattan") { |
| | dist = manhattanDistance(point, queryPoint); |
| | } else if (distanceMetric === "cosine") { |
| | dist = cosineDistance(point, queryPoint); |
| | } |
| | return { ...point, distance: dist }; |
| | }); |
| | |
| | |
| | let sortedPoints = [...distances].sort( |
| | (a, b) => a.distance - b.distance |
| | ); |
| | |
| | |
| | dataPoints.forEach((point) => { |
| | drawPoint(ennCtx, point.x, point.y, "#3498db", point.label); |
| | }); |
| | |
| | |
| | drawPoint( |
| | ennCtx, |
| | queryPoint.x, |
| | queryPoint.y, |
| | "#e74c3c", |
| | queryPoint.label, |
| | 12 |
| | ); |
| | |
| | |
| | if (step >= 1) { |
| | |
| | distances.forEach((point) => { |
| | drawLine( |
| | ennCtx, |
| | queryPoint.x, |
| | queryPoint.y, |
| | point.x, |
| | point.y, |
| | "#aaa", |
| | [3, 3] |
| | ); |
| | |
| | |
| | const midX = (queryPoint.x + point.x) / 2; |
| | const midY = (queryPoint.y + point.y) / 2; |
| | ennCtx.fillStyle = "#555"; |
| | ennCtx.font = "11px Arial"; |
| | ennCtx.textAlign = "center"; |
| | ennCtx.fillText(point.distance.toFixed(1), midX, midY); |
| | }); |
| | } |
| | |
| | if (step >= 2) { |
| | |
| | let yPos = 20; |
| | ennCtx.fillStyle = "#333"; |
| | ennCtx.font = "12px Arial"; |
| | ennCtx.textAlign = "left"; |
| | ennCtx.fillText("Sorted by distance:", 10, yPos); |
| | |
| | for (let i = 0; i < Math.min(5, sortedPoints.length); i++) { |
| | yPos += 15; |
| | ennCtx.fillText( |
| | `${i + 1}. ${sortedPoints[i].label} (${sortedPoints[ |
| | i |
| | ].distance.toFixed(1)})`, |
| | 15, |
| | yPos |
| | ); |
| | } |
| | } |
| | |
| | if (step >= 3) { |
| | |
| | const nearest = sortedPoints[0]; |
| | drawPoint( |
| | ennCtx, |
| | nearest.x, |
| | nearest.y, |
| | "#3498db", |
| | nearest.label, |
| | 10, |
| | "#2ecc71", |
| | 3 |
| | ); |
| | drawLine( |
| | ennCtx, |
| | queryPoint.x, |
| | queryPoint.y, |
| | nearest.x, |
| | nearest.y, |
| | "#2ecc71", |
| | [], |
| | 2 |
| | ); |
| | |
| | |
| | if (distanceMetric === "euclidean") { |
| | ennCtx.beginPath(); |
| | ennCtx.arc( |
| | queryPoint.x, |
| | queryPoint.y, |
| | nearest.distance, |
| | 0, |
| | Math.PI * 2 |
| | ); |
| | ennCtx.strokeStyle = "rgba(231, 76, 60, 0.4)"; |
| | ennCtx.stroke(); |
| | ennCtx.fillStyle = "rgba(231, 76, 60, 0.05)"; |
| | ennCtx.fill(); |
| | } else if (distanceMetric === "manhattan") { |
| | |
| | ennCtx.beginPath(); |
| | ennCtx.moveTo(queryPoint.x, queryPoint.y - nearest.distance); |
| | ennCtx.lineTo(queryPoint.x + nearest.distance, queryPoint.y); |
| | ennCtx.lineTo(queryPoint.x, queryPoint.y + nearest.distance); |
| | ennCtx.lineTo(queryPoint.x - nearest.distance, queryPoint.y); |
| | ennCtx.closePath(); |
| | ennCtx.strokeStyle = "rgba(231, 76, 60, 0.4)"; |
| | ennCtx.stroke(); |
| | ennCtx.fillStyle = "rgba(231, 76, 60, 0.05)"; |
| | ennCtx.fill(); |
| | } else if (distanceMetric === "cosine") { |
| | |
| | ennCtx.fillStyle = "rgba(231, 76, 60, 0.7)"; |
| | ennCtx.fillText( |
| | "Cosine similarity measures angle between vectors", |
| | 250, |
| | 30 |
| | ); |
| | ennCtx.fillText("smaller angle = more similar", 250, 45); |
| | } |
| | } |
| | |
| | |
| | updateENNStepInfo(step, distanceMetric); |
| | } |
| | |
| | |
| | function renderANNSearch() { |
| | const algorithm = annAlgorithmSelect.value; |
| | const step = parseInt(annStepSelect.value); |
| | |
| | |
| | annCtx.clearRect(0, 0, annCanvas.width, annCanvas.height); |
| | |
| | |
| | drawGrid(annCtx); |
| | |
| | |
| | dataPoints.forEach((point) => { |
| | drawPoint(annCtx, point.x, point.y, "#3498db", point.label); |
| | }); |
| | |
| | |
| | drawPoint( |
| | annCtx, |
| | queryPoint.x, |
| | queryPoint.y, |
| | "#e74c3c", |
| | queryPoint.label, |
| | 12 |
| | ); |
| | |
| | |
| | if (algorithm === "hnsw") { |
| | renderHNSW(annCtx, step); |
| | } else if (algorithm === "pq") { |
| | renderProductQuantization(annCtx, step); |
| | } else if (algorithm === "lsh") { |
| | renderLSH(annCtx, step); |
| | } |
| | |
| | |
| | updateANNStepInfo(step, algorithm); |
| | } |
| | |
| | |
| | function renderSemanticSearch() { |
| | const model = semanticModelSelect.value; |
| | const step = parseInt(semanticStepSelect.value); |
| | |
| | |
| | semanticCtx.clearRect( |
| | 0, |
| | 0, |
| | semanticCanvas.width, |
| | semanticCanvas.height |
| | ); |
| | |
| | if (step === 0) { |
| | |
| | drawTextDocuments(semanticCtx, semanticDocs, semanticQuery); |
| | } else { |
| | |
| | drawGrid(semanticCtx); |
| | |
| | |
| | semanticDocs.forEach((doc) => { |
| | |
| | const x = doc.embedding[0] * 400 + 30; |
| | const y = (1 - doc.embedding[1]) * 250 + 20; |
| | drawPoint(semanticCtx, x, y, "#3498db", `D${doc.id}`); |
| | }); |
| | |
| | |
| | const qx = semanticQuery.embedding[0] * 400 + 30; |
| | const qy = (1 - semanticQuery.embedding[1]) * 250 + 20; |
| | drawPoint(semanticCtx, qx, qy, "#e74c3c", "Q", 12); |
| | |
| | if (step >= 2) { |
| | |
| | const similarities = semanticDocs |
| | .map((doc) => ({ |
| | ...doc, |
| | similarity: cosineSimilarity( |
| | doc.embedding, |
| | semanticQuery.embedding |
| | ), |
| | })) |
| | .sort((a, b) => b.similarity - a.similarity); |
| | |
| | |
| | for (let i = 0; i < 3; i++) { |
| | const doc = similarities[i]; |
| | const dx = doc.embedding[0] * 400 + 30; |
| | const dy = (1 - doc.embedding[1]) * 250 + 20; |
| | |
| | const lineWidth = 3 - i; |
| | drawLine(semanticCtx, qx, qy, dx, dy, "#2ecc71", [], lineWidth); |
| | |
| | |
| | drawPoint( |
| | semanticCtx, |
| | dx, |
| | dy, |
| | "#3498db", |
| | `D${doc.id}`, |
| | 10, |
| | "#2ecc71", |
| | 2 |
| | ); |
| | |
| | |
| | const midX = (qx + dx) / 2; |
| | const midY = (qy + dy) / 2 - 10; |
| | semanticCtx.fillStyle = "#555"; |
| | semanticCtx.font = "11px Arial"; |
| | semanticCtx.textAlign = "center"; |
| | semanticCtx.fillText(doc.similarity.toFixed(2), midX, midY); |
| | } |
| | |
| | if (step >= 3) { |
| | |
| | let yPos = 20; |
| | semanticCtx.fillStyle = "#333"; |
| | semanticCtx.font = "12px Arial"; |
| | semanticCtx.textAlign = "left"; |
| | semanticCtx.fillText("Top matches:", 10, yPos); |
| | |
| | for (let i = 0; i < Math.min(3, similarities.length); i++) { |
| | yPos += 15; |
| | semanticCtx.fillText( |
| | `${similarities[i].text} (${similarities[ |
| | i |
| | ].similarity.toFixed(2)})`, |
| | 15, |
| | yPos |
| | ); |
| | } |
| | } |
| | } |
| | } |
| | |
| | |
| | updateSemanticStepInfo(step, model); |
| | } |
| | |
| | |
| | function renderSparseSearch() { |
| | const model = sparseModelSelect.value; |
| | const step = parseInt(sparseStepSelect.value); |
| | |
| | |
| | sparseCtx.clearRect(0, 0, sparseCanvas.width, sparseCanvas.height); |
| | |
| | if (step === 0) { |
| | |
| | drawTokenizedDocuments(sparseCtx, sparseVectors, sparseQuery); |
| | } else { |
| | |
| | drawSparseVectors(sparseCtx, sparseVectors, sparseQuery, step, model); |
| | |
| | if (step >= 2) { |
| | |
| | const matches = sparseVectors |
| | .map((doc) => { |
| | let score = 0; |
| | for (let i = 0; i < doc.vector.length; i++) { |
| | score += doc.vector[i] * sparseQuery.vector[i]; |
| | } |
| | return { ...doc, score }; |
| | }) |
| | .sort((a, b) => b.score - a.score); |
| | |
| | if (step >= 3) { |
| | |
| | let yPos = 20; |
| | sparseCtx.fillStyle = "#333"; |
| | sparseCtx.font = "12px Arial"; |
| | sparseCtx.textAlign = "left"; |
| | sparseCtx.fillText("Top matches:", 300, yPos); |
| | |
| | for (let i = 0; i < Math.min(3, matches.length); i++) { |
| | yPos += 15; |
| | sparseCtx.fillText( |
| | `${matches[i].text} (${matches[i].score.toFixed(2)})`, |
| | 300, |
| | yPos |
| | ); |
| | } |
| | } |
| | } |
| | } |
| | |
| | |
| | updateSparseStepInfo(step, model); |
| | } |
| | |
| | |
| | function renderHNSW(ctx, step) { |
| | if (step >= 1) { |
| | |
| | ctx.strokeStyle = "#f39c12"; |
| | ctx.lineWidth = 1; |
| | |
| | |
| | const topLayer = [dataPoints[2], dataPoints[4], dataPoints[7]]; |
| | topLayer.forEach((p1, i) => { |
| | topLayer.forEach((p2, j) => { |
| | if (i !== j) { |
| | drawLine(ctx, p1.x, p1.y, p2.x, p2.y, "#f39c12", [2, 2], 1); |
| | } |
| | }); |
| | }); |
| | |
| | |
| | if (step >= 2) { |
| | const midLayer = [ |
| | dataPoints[1], |
| | dataPoints[2], |
| | dataPoints[4], |
| | dataPoints[6], |
| | dataPoints[7], |
| | ]; |
| | midLayer.forEach((p1, i) => { |
| | let connections = 0; |
| | midLayer.forEach((p2, j) => { |
| | if (i !== j && connections < 3) { |
| | drawLine(ctx, p1.x, p1.y, p2.x, p2.y, "#f39c12", [], 1); |
| | connections++; |
| | } |
| | }); |
| | }); |
| | |
| | |
| | const entryPoint = dataPoints[4]; |
| | drawPoint( |
| | ctx, |
| | entryPoint.x, |
| | entryPoint.y, |
| | "#3498db", |
| | entryPoint.label, |
| | 10, |
| | "#f39c12", |
| | 2 |
| | ); |
| | drawLine( |
| | ctx, |
| | queryPoint.x, |
| | queryPoint.y, |
| | entryPoint.x, |
| | entryPoint.y, |
| | "#f39c12", |
| | [], |
| | 2 |
| | ); |
| | } |
| | |
| | if (step >= 3) { |
| | |
| | const searchPath = [ |
| | dataPoints[4], |
| | dataPoints[7], |
| | dataPoints[6], |
| | dataPoints[2], |
| | ]; |
| | |
| | for (let i = 0; i < searchPath.length - 1; i++) { |
| | const p1 = searchPath[i]; |
| | const p2 = searchPath[i + 1]; |
| | drawLine(ctx, p1.x, p1.y, p2.x, p2.y, "#e74c3c", [], 2); |
| | |
| | if (i < searchPath.length - 2) { |
| | drawPoint( |
| | ctx, |
| | p1.x, |
| | p1.y, |
| | "#3498db", |
| | p1.label, |
| | 10, |
| | "#f39c3c", |
| | 2 |
| | ); |
| | } |
| | } |
| | |
| | |
| | const nearest = dataPoints[2]; |
| | drawPoint( |
| | ctx, |
| | nearest.x, |
| | nearest.y, |
| | "#3498db", |
| | nearest.label, |
| | 10, |
| | "#2ecc71", |
| | 3 |
| | ); |
| | drawLine( |
| | ctx, |
| | queryPoint.x, |
| | queryPoint.y, |
| | nearest.x, |
| | nearest.y, |
| | "#2ecc71", |
| | [], |
| | 2 |
| | ); |
| | } |
| | } |
| | } |
| | |
| | function renderProductQuantization(ctx, step) { |
| | if (step >= 1) { |
| | |
| | |
| | |
| | ctx.strokeStyle = "#f39c12"; |
| | ctx.lineWidth = 2; |
| | ctx.setLineDash([]); |
| | |
| | |
| | ctx.beginPath(); |
| | ctx.moveTo(ennCanvas.width / 2, 0); |
| | ctx.lineTo(ennCanvas.width / 2, ennCanvas.height); |
| | ctx.stroke(); |
| | |
| | |
| | ctx.beginPath(); |
| | ctx.moveTo(0, ennCanvas.height / 2); |
| | ctx.lineTo(ennCanvas.width, ennCanvas.height / 2); |
| | ctx.stroke(); |
| | |
| | |
| | ctx.fillStyle = "#f39c12"; |
| | ctx.font = "12px Arial"; |
| | ctx.textAlign = "center"; |
| | ctx.fillText("Region 1", ennCanvas.width / 4, ennCanvas.height / 4); |
| | ctx.fillText( |
| | "Region 2", |
| | (3 * ennCanvas.width) / 4, |
| | ennCanvas.height / 4 |
| | ); |
| | ctx.fillText( |
| | "Region 3", |
| | ennCanvas.width / 4, |
| | (3 * ennCanvas.height) / 4 |
| | ); |
| | ctx.fillText( |
| | "Region 4", |
| | (3 * ennCanvas.width) / 4, |
| | (3 * ennCanvas.height) / 4 |
| | ); |
| | |
| | if (step >= 2) { |
| | |
| | let queryRegion; |
| | if (queryPoint.x < ennCanvas.width / 2) { |
| | if (queryPoint.y < ennCanvas.height / 2) { |
| | queryRegion = 1; |
| | } else { |
| | queryRegion = 3; |
| | } |
| | } else { |
| | if (queryPoint.y < ennCanvas.height / 2) { |
| | queryRegion = 2; |
| | } else { |
| | queryRegion = 4; |
| | } |
| | } |
| | |
| | |
| | ctx.fillStyle = "rgba(243, 156, 18, 0.1)"; |
| | if (queryRegion === 1) { |
| | ctx.fillRect(0, 0, ennCanvas.width / 2, ennCanvas.height / 2); |
| | } else if (queryRegion === 2) { |
| | ctx.fillRect( |
| | ennCanvas.width / 2, |
| | 0, |
| | ennCanvas.width / 2, |
| | ennCanvas.height / 2 |
| | ); |
| | } else if (queryRegion === 3) { |
| | ctx.fillRect( |
| | 0, |
| | ennCanvas.height / 2, |
| | ennCanvas.width / 2, |
| | ennCanvas.height / 2 |
| | ); |
| | } else { |
| | ctx.fillRect( |
| | ennCanvas.width / 2, |
| | ennCanvas.height / 2, |
| | ennCanvas.width / 2, |
| | ennCanvas.height / 2 |
| | ); |
| | } |
| | |
| | |
| | const pointsInRegion = dataPoints.filter((p) => { |
| | const region = |
| | p.x < ennCanvas.width / 2 |
| | ? p.y < ennCanvas.height / 2 |
| | ? 1 |
| | : 3 |
| | : p.y < ennCanvas.height / 2 |
| | ? 2 |
| | : 4; |
| | return region === queryRegion; |
| | }); |
| | |
| | |
| | pointsInRegion.forEach((point) => { |
| | drawLine( |
| | ctx, |
| | queryPoint.x, |
| | queryPoint.y, |
| | point.x, |
| | point.y, |
| | "#aaa", |
| | [3, 3] |
| | ); |
| | }); |
| | } |
| | |
| | if (step >= 3) { |
| | |
| | const distances = dataPoints.map((point) => ({ |
| | ...point, |
| | distance: euclideanDistance(point, queryPoint), |
| | })); |
| | |
| | |
| | let queryRegion; |
| | if (queryPoint.x < ennCanvas.width / 2) { |
| | if (queryPoint.y < ennCanvas.height / 2) { |
| | queryRegion = 1; |
| | } else { |
| | queryRegion = 3; |
| | } |
| | } else { |
| | if (queryPoint.y < ennCanvas.height / 2) { |
| | queryRegion = 2; |
| | } else { |
| | queryRegion = 4; |
| | } |
| | } |
| | |
| | const pointsInRegion = distances.filter((p) => { |
| | const region = |
| | p.x < ennCanvas.width / 2 |
| | ? p.y < ennCanvas.height / 2 |
| | ? 1 |
| | : 3 |
| | : p.y < ennCanvas.height / 2 |
| | ? 2 |
| | : 4; |
| | return region === queryRegion; |
| | }); |
| | |
| | |
| | const nearest = pointsInRegion.sort( |
| | (a, b) => a.distance - b.distance |
| | )[0]; |
| | |
| | |
| | drawPoint( |
| | ctx, |
| | nearest.x, |
| | nearest.y, |
| | "#3498db", |
| | nearest.label, |
| | 10, |
| | "#2ecc71", |
| | 3 |
| | ); |
| | drawLine( |
| | ctx, |
| | queryPoint.x, |
| | queryPoint.y, |
| | nearest.x, |
| | nearest.y, |
| | "#2ecc71", |
| | [], |
| | 2 |
| | ); |
| | |
| | |
| | const trueNearest = distances.sort( |
| | (a, b) => a.distance - b.distance |
| | )[0]; |
| | if (nearest.id !== trueNearest.id) { |
| | |
| | drawPoint( |
| | ctx, |
| | trueNearest.x, |
| | trueNearest.y, |
| | "#3498db", |
| | trueNearest.label, |
| | 10, |
| | "#e74c3c", |
| | 2 |
| | ); |
| | drawLine( |
| | ctx, |
| | queryPoint.x, |
| | queryPoint.y, |
| | trueNearest.x, |
| | trueNearest.y, |
| | "#e74c3c", |
| | [5, 5], |
| | 1 |
| | ); |
| | |
| | ctx.fillStyle = "#e74c3c"; |
| | ctx.font = "12px Arial"; |
| | ctx.textAlign = "left"; |
| | ctx.fillText("Approximation error", 10, 20); |
| | ctx.fillText(`True nearest: ${trueNearest.label}`, 10, 35); |
| | } else { |
| | ctx.fillStyle = "#2ecc71"; |
| | ctx.font = "12px Arial"; |
| | ctx.textAlign = "left"; |
| | ctx.fillText("Correct match", 10, 20); |
| | } |
| | } |
| | } |
| | } |
| | |
| | |
| | function drawGrid(ctx) { |
| | ctx.strokeStyle = "#e0e0e0"; |
| | ctx.lineWidth = 0.5; |
| | |
| | |
| | for (let x = 0; x < ctx.canvas.width; x += 40) { |
| | ctx.beginPath(); |
| | ctx.moveTo(x, 0); |
| | ctx.lineTo(x, ctx.canvas.height); |
| | ctx.stroke(); |
| | } |
| | |
| | |
| | for (let y = 0; y < ctx.canvas.height; y += 40) { |
| | ctx.beginPath(); |
| | ctx.moveTo(0, y); |
| | ctx.lineTo(ctx.canvas.width, y); |
| | ctx.stroke(); |
| | } |
| | } |
| | |
| | function drawPoint( |
| | ctx, |
| | x, |
| | y, |
| | color, |
| | label, |
| | radius = 8, |
| | strokeColor = "#333", |
| | strokeWidth = 1 |
| | ) { |
| | ctx.beginPath(); |
| | ctx.arc(x, y, radius, 0, Math.PI * 2); |
| | ctx.fillStyle = color; |
| | ctx.fill(); |
| | ctx.strokeStyle = strokeColor; |
| | ctx.lineWidth = strokeWidth; |
| | ctx.stroke(); |
| | |
| | |
| | ctx.fillStyle = "#333"; |
| | ctx.font = "12px Arial"; |
| | ctx.textAlign = "center"; |
| | ctx.fillText(label, x, y - radius - 5); |
| | } |
| | |
| | function drawLine( |
| | ctx, |
| | x1, |
| | y1, |
| | x2, |
| | y2, |
| | color = "#333", |
| | dash = [], |
| | width = 1 |
| | ) { |
| | ctx.beginPath(); |
| | ctx.setLineDash(dash); |
| | ctx.strokeStyle = color; |
| | ctx.lineWidth = width; |
| | ctx.moveTo(x1, y1); |
| | ctx.lineTo(x2, y2); |
| | ctx.stroke(); |
| | ctx.setLineDash([]); |
| | } |
| | |
| | function drawTextDocuments(ctx, docs, query) { |
| | ctx.fillStyle = "#333"; |
| | ctx.font = "14px Arial"; |
| | ctx.textAlign = "left"; |
| | |
| | |
| | ctx.fillText("Original Text Documents:", 20, 30); |
| | |
| | |
| | let y = 60; |
| | docs.slice(0, 5).forEach((doc) => { |
| | ctx.fillStyle = "#3498db"; |
| | ctx.fillText(`D${doc.id}: ${doc.text}`, 20, y); |
| | y += 25; |
| | }); |
| | |
| | |
| | y += 20; |
| | ctx.fillStyle = "#e74c3c"; |
| | ctx.fillText(`Query: "${query.text}"`, 20, y); |
| | |
| | |
| | y += 40; |
| | ctx.fillStyle = "#333"; |
| | ctx.fillText( |
| | "Step 1: These documents will be converted to vector embeddings", |
| | 20, |
| | y |
| | ); |
| | ctx.fillText("that capture their semantic meaning.", 20, y + 20); |
| | } |
| | |
| | function drawTokenizedDocuments(ctx, docs, query) { |
| | ctx.fillStyle = "#333"; |
| | ctx.font = "14px Arial"; |
| | ctx.textAlign = "left"; |
| | |
| | |
| | ctx.fillText("Tokenized Documents:", 20, 30); |
| | |
| | |
| | ctx.fillText( |
| | "Vocabulary: dog, cat, train, pet, health, food, guide, home, behavior, puppy", |
| | 20, |
| | 50 |
| | ); |
| | |
| | |
| | let y = 80; |
| | docs.slice(0, 5).forEach((doc) => { |
| | ctx.fillStyle = "#3498db"; |
| | ctx.fillText(`D${doc.id}: ${doc.text}`, 20, y); |
| | |
| | |
| | for (let i = 0; i < vocabulary.length; i++) { |
| | if ( |
| | doc.vector[i] > 0 && |
| | doc.text.toLowerCase().includes(vocabulary[i]) |
| | ) { |
| | const startX = 20 + ctx.measureText(`D${doc.id}: `).width; |
| | const wordStart = doc.text.toLowerCase().indexOf(vocabulary[i]); |
| | const prefix = doc.text.substring(0, wordStart); |
| | const prefixWidth = ctx.measureText(prefix).width; |
| | const wordWidth = ctx.measureText(vocabulary[i]).width; |
| | |
| | ctx.fillStyle = "rgba(46, 204, 113, 0.3)"; |
| | ctx.fillRect(startX + prefixWidth, y - 12, wordWidth, 15); |
| | } |
| | } |
| | |
| | y += 25; |
| | }); |
| | |
| | |
| | y += 20; |
| | ctx.fillStyle = "#e74c3c"; |
| | ctx.fillText(`Query: "${query.text}"`, 20, y); |
| | |
| | |
| | for (let i = 0; i < vocabulary.length; i++) { |
| | if ( |
| | query.vector[i] > 0 && |
| | query.text.toLowerCase().includes(vocabulary[i]) |
| | ) { |
| | const startX = 20 + ctx.measureText(`Query: "`).width; |
| | const wordStart = query.text.toLowerCase().indexOf(vocabulary[i]); |
| | const prefix = query.text.substring(0, wordStart); |
| | const prefixWidth = ctx.measureText(prefix).width; |
| | const wordWidth = ctx.measureText(vocabulary[i]).width; |
| | |
| | ctx.fillStyle = "rgba(231, 76, 60, 0.3)"; |
| | ctx.fillRect(startX + prefixWidth, y - 12, wordWidth, 15); |
| | } |
| | } |
| | } |
| | |
| | function drawSparseVectors(ctx, docs, query, step, model) { |
| | const barWidth = 15; |
| | const barSpacing = 5; |
| | const startX = 40; |
| | const startY = 220; |
| | const maxBarHeight = 100; |
| | |
| | if (step >= 1) { |
| | |
| | ctx.fillStyle = "#333"; |
| | ctx.font = "10px Arial"; |
| | ctx.textAlign = "center"; |
| | |
| | vocabulary.forEach((word, i) => { |
| | const x = startX + i * (barWidth + barSpacing) + barWidth / 2; |
| | ctx.fillText(word, x, startY + 15); |
| | }); |
| | |
| | |
| | ctx.textAlign = "center"; |
| | ctx.fillText("Vocabulary Terms", 230, startY + 30); |
| | |
| | ctx.save(); |
| | ctx.translate(15, 150); |
| | ctx.rotate(-Math.PI / 2); |
| | ctx.fillText("Term Weight", 0, 0); |
| | ctx.restore(); |
| | |
| | |
| | ctx.fillStyle = "#333"; |
| | ctx.font = "12px Arial"; |
| | ctx.textAlign = "left"; |
| | ctx.fillText("Query vector:", 20, 40); |
| | |
| | query.vector.forEach((value, i) => { |
| | const x = startX + i * (barWidth + barSpacing); |
| | const barHeight = value * maxBarHeight; |
| | |
| | ctx.fillStyle = value > 0 ? "#e74c3c" : "#f8f9fa"; |
| | ctx.fillRect(x, startY - barHeight, barWidth, barHeight); |
| | |
| | if (value > 0) { |
| | ctx.fillStyle = "#fff"; |
| | ctx.textAlign = "center"; |
| | ctx.font = "9px Arial"; |
| | ctx.fillText( |
| | value.toFixed(1), |
| | x + barWidth / 2, |
| | startY - barHeight / 2 |
| | ); |
| | } |
| | |
| | |
| | const miniHeight = value * 20; |
| | ctx.fillStyle = value > 0 ? "#e74c3c" : "#f8f9fa"; |
| | ctx.fillRect(x, 50, barWidth, miniHeight); |
| | }); |
| | |
| | if (step >= 2) { |
| | |
| | const matchingDoc = docs.find((d) => d.id === 1); |
| | |
| | ctx.fillStyle = "#333"; |
| | ctx.font = "12px Arial"; |
| | ctx.textAlign = "left"; |
| | ctx.fillText(`Document: "${matchingDoc.text}"`, 20, 100); |
| | |
| | matchingDoc.vector.forEach((value, i) => { |
| | const x = startX + i * (barWidth + barSpacing); |
| | const miniHeight = value * 20; |
| | |
| | |
| | ctx.fillStyle = value > 0 ? "#3498db" : "#f8f9fa"; |
| | ctx.fillRect(x, 110, barWidth, miniHeight); |
| | |
| | |
| | if (value > 0 && query.vector[i] > 0) { |
| | ctx.fillStyle = "#2ecc71"; |
| | ctx.strokeStyle = "#2ecc71"; |
| | ctx.lineWidth = 2; |
| | ctx.strokeRect(x, 50, barWidth, query.vector[i] * 20); |
| | ctx.strokeRect(x, 110, barWidth, miniHeight); |
| | |
| | |
| | drawLine( |
| | ctx, |
| | x + barWidth / 2, |
| | 50 + query.vector[i] * 20, |
| | x + barWidth / 2, |
| | 110, |
| | "#2ecc71", |
| | [], |
| | 1 |
| | ); |
| | } |
| | }); |
| | |
| | |
| | let dotProduct = 0; |
| | for (let i = 0; i < query.vector.length; i++) { |
| | dotProduct += query.vector[i] * matchingDoc.vector[i]; |
| | } |
| | |
| | ctx.fillStyle = "#333"; |
| | ctx.font = "12px Arial"; |
| | ctx.textAlign = "left"; |
| | ctx.fillText(`Matching score: ${dotProduct.toFixed(2)}`, 320, 100); |
| | } |
| | } |
| | } |
| | |
| | |
| | function updateENNStepInfo(step, distanceMetric) { |
| | let title, description; |
| | |
| | switch (step) { |
| | case 0: |
| | title = "Step 0: Data points"; |
| | description = |
| | "Initial dataset with vectors in feature space. The query point (red) will be compared against all data points."; |
| | break; |
| | case 1: |
| | title = "Step 1: Calculate all distances"; |
| | if (distanceMetric === "euclidean") { |
| | description = |
| | "Calculate Euclidean (L2) distance between query and every data point: d = √((x₂-x₁)² + (y₂-y₁)²)."; |
| | } else if (distanceMetric === "manhattan") { |
| | description = |
| | "Calculate Manhattan (L1) distance between query and every data point: d = |x₂-x₁| + |y₂-y₁|."; |
| | } else { |
| | description = |
| | "Calculate Cosine similarity between query and data points: similarity = cos(θ) between vectors."; |
| | } |
| | break; |
| | case 2: |
| | title = "Step 2: Sort by distance"; |
| | description = |
| | "Sort all data points by their distance to query point (ascending order for distance, descending for similarity)."; |
| | break; |
| | case 3: |
| | title = "Step 3: Return nearest neighbors"; |
| | description = |
| | "Return the k closest data points (here k=1). This approach guarantees finding the exact nearest neighbor."; |
| | break; |
| | } |
| | |
| | ennStepTitle.textContent = title; |
| | ennStepDesc.textContent = description; |
| | } |
| | |
| | function updateANNStepInfo(step, algorithm) { |
| | let title, description; |
| | |
| | switch (step) { |
| | case 0: |
| | title = "Step 0: Indexed structure"; |
| | if (algorithm === "hnsw") { |
| | description = |
| | "HNSW pre-organizes vectors into a navigable small world graph with multiple layers for efficient search."; |
| | } else if (algorithm === "pq") { |
| | description = |
| | "Product Quantization divides the vector space into smaller subspaces and quantizes each dimension group."; |
| | } else { |
| | description = |
| | "Locality-Sensitive Hashing uses hash functions that map similar vectors to the same buckets."; |
| | } |
| | break; |
| | case 1: |
| | title = "Step 1: Navigate to region"; |
| | if (algorithm === "hnsw") { |
| | description = |
| | "Search begins at a random entry point in the top layer (sparse connections)."; |
| | } else if (algorithm === "pq") { |
| | description = |
| | "The query is mapped to specific regions in each subspace based on quantized centroids."; |
| | } else { |
| | description = |
| | "Query vector is hashed to identify which bucket(s) to search."; |
| | } |
| | break; |
| | case 2: |
| | title = "Step 2: Local search"; |
| | if (algorithm === "hnsw") { |
| | description = |
| | "Navigate through connections to find closer and closer neighbors, descending through layers."; |
| | } else if (algorithm === "pq") { |
| | description = |
| | "Compare only with points in the same or nearby quantized regions to limit search space."; |
| | } else { |
| | description = |
| | "Only compute distances for vectors in the same hash bucket, dramatically reducing comparisons."; |
| | } |
| | break; |
| | case 3: |
| | title = "Step 3: Return approximate NN"; |
| | if (algorithm === "hnsw") { |
| | description = |
| | "Return the closest point found. May not be the true nearest neighbor, but usually very close."; |
| | } else if (algorithm === "pq") { |
| | description = |
| | "Approximates distances between query and dataset points. Fast but loses some precision."; |
| | } else { |
| | description = |
| | "If points fall into different buckets, LSH might miss true nearest neighbors (accuracy vs. speed tradeoff)."; |
| | } |
| | break; |
| | } |
| | |
| | annStepTitle.textContent = title; |
| | annStepDesc.textContent = description; |
| | } |
| | |
| | function updateSemanticStepInfo(step, model) { |
| | let title, description; |
| | |
| | switch (step) { |
| | case 0: |
| | title = "Step 0: Text documents"; |
| | description = "Raw text data before encoding into vector space."; |
| | break; |
| | case 1: |
| | title = "Step 1: Generate embeddings"; |
| | if (model === "bert") { |
| | description = |
| | "BERT creates dense vector embeddings (768 dimensions) that capture semantic meaning of text."; |
| | } else if (model === "use") { |
| | description = |
| | "Universal Sentence Encoder maps sentences to 512-dimensional vectors that capture meaning."; |
| | } else { |
| | description = |
| | "Domain-specific embeddings capture meaning relevant to particular fields or applications."; |
| | } |
| | break; |
| | case 2: |
| | title = "Step 2: Vector similarity search"; |
| | description = |
| | "Calculate similarity (usually cosine) between query vector and document vectors."; |
| | break; |
| | case 3: |
| | title = "Step 3: Return relevant results"; |
| | description = |
| | "Rank documents by similarity and return the most relevant. Results include semantic matches, not just exact keyword matches."; |
| | break; |
| | } |
| | |
| | semanticStepTitle.textContent = title; |
| | semanticStepDesc.textContent = description; |
| | } |
| | |
| | function updateSparseStepInfo(step, model) { |
| | let title, description; |
| | |
| | switch (step) { |
| | case 0: |
| | title = "Step 0: Tokenized content"; |
| | description = |
| | "Documents broken down into tokens (words/terms) before converting to sparse vector representation."; |
| | break; |
| | case 1: |
| | title = "Step 1: Create sparse vectors"; |
| | if (model === "tfidf") { |
| | description = |
| | "TF-IDF weights tokens based on term frequency and inverse document frequency to emphasize distinctive terms."; |
| | } else if (model === "bm25") { |
| | description = |
| | "BM25 extends TF-IDF with better term saturation and document length normalization."; |
| | } else { |
| | description = |
| | "Hybrid representations combine sparse (keyword) and dense (semantic) vectors for better retrieval."; |
| | } |
| | break; |
| | case 2: |
| | title = "Step 2: Inverted index search"; |
| | description = |
| | "Lookup only the specific terms present in the query, accessing posting lists through an inverted index."; |
| | break; |
| | case 3: |
| | title = "Step 3: Return matches"; |
| | description = |
| | "Return documents with matching terms, ranked by relevance score. Very efficient for exact term matches."; |
| | break; |
| | } |
| | |
| | sparseStepTitle.textContent = title; |
| | sparseStepDesc.textContent = description; |
| | } |
| | </script> |
| | </body> |
| | </html> |
| |
|