davanstrien
HF Staff
Update findSimilarFromResult function and improve dataset suggestions handling
6e41f47
| <html lang="en"> | |
| <head> | |
| <meta charset="UTF-8" /> | |
| <meta name="viewport" content="width=device-width, initial-scale=1.0" /> | |
| <title>Hub Semantic Search</title> | |
| <script src="https://cdn.tailwindcss.com"></script> | |
| <script src="https://unpkg.com/lucide@latest"></script> | |
| <script src="https://cdn.jsdelivr.net/npm/lodash@4.17.21/lodash.min.js"></script> | |
| </head> | |
| <body> | |
| <div class="w-full max-w-4xl mx-auto p-4 space-y-8"> | |
| <h1 class="text-3xl font-bold text-gray-800">Hub Semantic Search</h1> | |
| <div | |
| class="bg-gradient-to-br from-blue-50 to-indigo-50 p-6 rounded-xl shadow-sm border border-blue-100 mb-6" | |
| > | |
| <h2 | |
| class="text-lg font-semibold mb-2 text-gray-800 flex items-center gap-2" | |
| > | |
| <i data-lucide="search" class="text-blue-500"></i> | |
| Welcome to Hub Semantic Search | |
| </h2> | |
| <p class="text-gray-700 mb-2 text-sm"> | |
| Find and explore the 🤗 Hub using via semantic search on LLM generated | |
| summaries! | |
| </p> | |
| <div | |
| class="bg-blue-100 text-blue-800 px-3 py-1.5 rounded-md mb-2 text-sm" | |
| > | |
| <p class="flex items-center gap-2"> | |
| <i data-lucide="info"></i> Currently supporting dataset search only. | |
| Model search coming soon! | |
| </p> | |
| </div> | |
| <button | |
| onclick="toggleAccordion()" | |
| id="accordionButton" | |
| class="text-blue-500 hover:text-blue-700 flex items-center gap-2 text-sm" | |
| > | |
| <i | |
| data-lucide="chevron-right" | |
| id="accordionIcon" | |
| class="transition-transform" | |
| ></i> | |
| <span>How it works</span> | |
| </button> | |
| <div id="accordionContent" class="hidden"> | |
| <ul | |
| class="list-disc list-inside space-y-1 text-gray-600 ml-4 mt-2 text-sm" | |
| > | |
| <li> | |
| <strong>AI-Generated Summaries:</strong> Each dataset is indexed | |
| using a concise summary generated by an LLM | |
| </li> | |
| <li> | |
| <strong>Semantic Search:</strong> Find semantically similar | |
| resources based on these summaries | |
| </li> | |
| <li> | |
| <strong>Find Similar:</strong> Discover related resources using | |
| semantic matching | |
| </li> | |
| </ul> | |
| </div> | |
| </div> | |
| <div class="tabs w-full"> | |
| <div class="tab-list flex gap-2 border-b mb-6"> | |
| <button | |
| onclick="switchTab('search')" | |
| id="searchTab" | |
| class="tab-trigger active px-4 sm:px-6 py-3 flex items-center gap-2 border-b-2 border-transparent hover:bg-gray-50 transition-colors flex-1 justify-center" | |
| > | |
| <i data-lucide="search"></i> Search | |
| </button> | |
| <button | |
| onclick="switchTab('similar')" | |
| id="similarTab" | |
| class="tab-trigger px-4 sm:px-6 py-3 flex items-center gap-2 border-b-2 border-transparent hover:bg-gray-50 transition-colors flex-1 justify-center" | |
| > | |
| <i data-lucide="arrow-right"></i> Find Similar | |
| </button> | |
| </div> | |
| <div id="searchContent" class="tab-content space-y-4"> | |
| <div | |
| class="card bg-white p-8 rounded-xl shadow-sm border border-gray-100" | |
| > | |
| <p class="text-gray-600 mb-4"> | |
| Enter keywords to search through dataset descriptions. The search | |
| will automatically update as you type. | |
| </p> | |
| <div class="relative"> | |
| <input | |
| type="text" | |
| id="searchInput" | |
| placeholder="Type to search (minimum 3 characters)..." | |
| class="w-full p-3 border rounded-lg pr-10 focus:ring-2 focus:ring-blue-100 focus:border-blue-300 transition-all outline-none" | |
| /> | |
| <div id="searchLoader" class="hidden absolute right-3 top-2"> | |
| <i data-lucide="loader-2" class="animate-spin"></i> | |
| </div> | |
| </div> | |
| </div> | |
| </div> | |
| <div id="similarContent" class="hidden tab-content space-y-4"> | |
| <div | |
| class="card bg-white p-8 rounded-xl shadow-sm border border-gray-100" | |
| > | |
| <p class="text-gray-600 mb-4"> | |
| Enter a dataset ID to find similar datasets. Popular datasets will | |
| appear as you type. | |
| </p> | |
| <div class="flex gap-3"> | |
| <div class="relative w-full"> | |
| <input | |
| type="text" | |
| id="datasetInput" | |
| class="w-full p-3 border border-gray-200 rounded-lg" | |
| placeholder="e.g. openai/gsm8k" | |
| /> | |
| <div | |
| id="suggestionsBox" | |
| class="hidden absolute w-full mt-1 bg-white border border-gray-200 rounded-lg shadow-lg z-10 max-h-60 overflow-y-auto" | |
| ></div> | |
| </div> | |
| <button onclick="findSimilarDatasets()" class="btn-primary"> | |
| Find Similar | |
| </button> | |
| </div> | |
| </div> | |
| </div> | |
| <div | |
| id="errorMessage" | |
| class="hidden mt-4 p-4 text-red-600 bg-red-50 rounded-md" | |
| ></div> | |
| <div id="resultsContainer" class="mt-6 space-y-4"></div> | |
| </div> | |
| </div> | |
| <style> | |
| .tab-trigger.active { | |
| border-bottom-color: #3b82f6; | |
| color: #3b82f6; | |
| } | |
| </style> | |
| <script> | |
| // Configuration | |
| const API_URL = | |
| "https://davanstrien-huggingface-datasets-search-v2.hf.space"; | |
| const MIN_SEARCH_LENGTH = 3; | |
| const DEBOUNCE_MS = 300; | |
| const RESULTS_PER_PAGE = 5; | |
| const MAX_RESULTS = 100; | |
| let currentPage = 1; | |
| // Initialize Lucide icons | |
| lucide.createIcons(); | |
| // Tab switching | |
| function switchTab(tabId) { | |
| currentPage = 1; | |
| document | |
| .querySelectorAll(".tab-content") | |
| .forEach((content) => content.classList.add("hidden")); | |
| document | |
| .querySelectorAll(".tab-trigger") | |
| .forEach((trigger) => trigger.classList.remove("active")); | |
| document.getElementById(`${tabId}Content`).classList.remove("hidden"); | |
| document.getElementById(`${tabId}Tab`).classList.add("active"); | |
| } | |
| // Create result card | |
| function createResultCard(result) { | |
| const cardHtml = ` | |
| <div class="card bg-white p-4 sm:p-6 rounded-lg shadow hover:shadow-md transition-shadow"> | |
| <div class="space-y-2 w-full"> | |
| <div class="flex flex-col sm:flex-row sm:items-center justify-between gap-2"> | |
| <div class="flex items-center gap-2"> | |
| <i data-lucide="database" class="text-blue-500"></i> | |
| <h3 class="text-lg font-semibold">${ | |
| result.dataset_id | |
| }</h3> | |
| </div> | |
| <div class="flex flex-wrap items-center gap-2"> | |
| <div class="flex items-center gap-4 text-sm text-gray-500"> | |
| <span class="flex items-center gap-1"> | |
| <i data-lucide="heart" class="w-4 h-4"></i> | |
| ${result.likes} | |
| </span> | |
| <span class="flex items-center gap-1"> | |
| <i data-lucide="download" class="w-4 h-4"></i> | |
| ${result.downloads} | |
| </span> | |
| </div> | |
| <span class="bg-blue-50 px-2 py-1 rounded text-sm"> | |
| ${(result.similarity * 100).toFixed(1)}% match | |
| </span> | |
| <button | |
| onclick="findSimilarFromResult('${ | |
| result.dataset_id | |
| }')" | |
| class="flex items-center gap-1 text-sm text-blue-500 hover:text-blue-700" | |
| > | |
| <i data-lucide="arrow-right"></i> | |
| Find Similar | |
| </button> | |
| </div> | |
| </div> | |
| <p class="text-sm text-gray-600">${result.summary}</p> | |
| <!-- Add preview section that starts hidden --> | |
| <div id="preview-section-${ | |
| result.dataset_id | |
| }" class="mt-4 border-t pt-4 hidden"> | |
| <button | |
| onclick="togglePreview('${result.dataset_id}')" | |
| class="flex items-center gap-2 text-sm text-gray-600 hover:text-gray-800" | |
| > | |
| <i data-lucide="chevron-right" id="preview-icon-${ | |
| result.dataset_id | |
| }" class="transition-transform"></i> | |
| Preview Dataset | |
| </button> | |
| <div id="preview-content-${ | |
| result.dataset_id | |
| }" class="hidden mt-4"> | |
| <iframe | |
| src="https://huggingface.co/datasets/${ | |
| result.dataset_id | |
| }/embed/viewer/default/train" | |
| frameborder="0" | |
| width="100%" | |
| height="560px" | |
| ></iframe> | |
| </div> | |
| </div> | |
| <a href="https://huggingface.co/datasets/${ | |
| result.dataset_id | |
| }" | |
| target="_blank" | |
| class="inline-flex items-center gap-1 text-sm text-blue-500 hover:text-blue-700 mt-2"> | |
| <i data-lucide="external-link" class="w-4 h-4"></i> | |
| View on Hugging Face Hub | |
| </a> | |
| </div> | |
| </div> | |
| `; | |
| // After rendering the card, check if preview is available | |
| checkDatasetValidity(result.dataset_id); | |
| return cardHtml; | |
| } | |
| // Add function to check dataset validity | |
| async function checkDatasetValidity(datasetId) { | |
| try { | |
| const response = await fetch( | |
| `https://datasets-server.huggingface.co/is-valid?dataset=${datasetId}` | |
| ); | |
| const data = await response.json(); | |
| // Show preview section only if viewer is available | |
| if (data.viewer) { | |
| const previewSection = document.getElementById( | |
| `preview-section-${datasetId}` | |
| ); | |
| if (previewSection) { | |
| previewSection.classList.remove("hidden"); | |
| } | |
| } | |
| } catch (error) { | |
| console.error( | |
| `Failed to check validity for dataset ${datasetId}:`, | |
| error | |
| ); | |
| } | |
| } | |
| // Search datasets | |
| const searchDatasets = _.debounce(async (query, page = 1) => { | |
| if (query.length < MIN_SEARCH_LENGTH) { | |
| document.getElementById("resultsContainer").innerHTML = ""; | |
| return; | |
| } | |
| document.getElementById("searchLoader").classList.remove("hidden"); | |
| document.getElementById("errorMessage").classList.add("hidden"); | |
| try { | |
| const response = await fetch( | |
| `${API_URL}/search/datasets?query=${encodeURIComponent(query)}&k=${ | |
| RESULTS_PER_PAGE * page | |
| }` | |
| ); | |
| if (!response.ok) throw new Error("Search failed"); | |
| const data = await response.json(); | |
| console.log("Search results:", data); | |
| displayResults(data.results, page); | |
| } catch (error) { | |
| console.error("Search error:", error); | |
| showError("Failed to perform search. Please try again."); | |
| } finally { | |
| document.getElementById("searchLoader").classList.add("hidden"); | |
| } | |
| }, DEBOUNCE_MS); | |
| // Cache for trending datasets | |
| let trendingDatasetsCache = null; | |
| let cacheTimestamp = null; | |
| const CACHE_DURATION = 1000 * 60 * 15; // 15 minutes | |
| async function fetchTrendingDatasets() { | |
| if ( | |
| trendingDatasetsCache && | |
| cacheTimestamp && | |
| Date.now() - cacheTimestamp < CACHE_DURATION | |
| ) { | |
| return trendingDatasetsCache; | |
| } | |
| try { | |
| const response = await fetch("https://huggingface.co/api/datasets"); | |
| const data = await response.json(); | |
| // Just take the first 20 dataset IDs since they're already sorted | |
| const trendingDatasets = data | |
| .slice(0, 20) | |
| .map((dataset) => dataset.id); | |
| trendingDatasetsCache = trendingDatasets; | |
| cacheTimestamp = Date.now(); | |
| return trendingDatasets; | |
| } catch (error) { | |
| console.error("Error fetching trending datasets:", error); | |
| return []; | |
| } | |
| } | |
| function displaySuggestions(datasets, suggestionsBox) { | |
| if (datasets.length > 0) { | |
| suggestionsBox.innerHTML = datasets | |
| .map( | |
| (datasetId) => ` | |
| <div | |
| class="p-3 hover:bg-gray-50 cursor-pointer border-b last:border-b-0" | |
| onclick="selectSuggestion('${datasetId}')" | |
| > | |
| <div class="flex items-center gap-2"> | |
| <i data-lucide="database" class="w-4 h-4 text-blue-500"></i> | |
| <span>${datasetId}</span> | |
| </div> | |
| </div> | |
| ` | |
| ) | |
| .join(""); | |
| suggestionsBox.classList.remove("hidden"); | |
| lucide.createIcons(); | |
| } else { | |
| suggestionsBox.classList.add("hidden"); | |
| } | |
| } | |
| function selectSuggestion(dataset) { | |
| const datasetInput = document.getElementById("datasetInput"); | |
| const suggestionsBox = document.getElementById("suggestionsBox"); | |
| datasetInput.value = dataset; | |
| suggestionsBox.classList.add("hidden"); | |
| findSimilarDatasets(); | |
| } | |
| // Find similar datasets | |
| async function findSimilarDatasets(page = 1) { | |
| const datasetId = document.getElementById("datasetInput").value; | |
| if (!datasetId) return; | |
| const similarLoader = document.getElementById("similarLoader"); | |
| if (similarLoader) { | |
| similarLoader.classList.remove("hidden"); | |
| } | |
| document.getElementById("errorMessage").classList.add("hidden"); | |
| try { | |
| const response = await fetch( | |
| `${API_URL}/similarity/datasets?dataset_id=${encodeURIComponent( | |
| datasetId | |
| )}&k=${RESULTS_PER_PAGE * page}` | |
| ); | |
| if (!response.ok) throw new Error("Similarity search failed"); | |
| const data = await response.json(); | |
| displayResults(data.results, page); | |
| } catch (error) { | |
| showError("Failed to find similar datasets. Please try again."); | |
| } finally { | |
| if (similarLoader) { | |
| similarLoader.classList.add("hidden"); | |
| } | |
| } | |
| } | |
| // Display results | |
| function displayResults(results, page = 1) { | |
| const container = document.getElementById("resultsContainer"); | |
| console.log("Displaying results:", results); | |
| if (results && results.length > 0) { | |
| container.innerHTML = ` | |
| <div class="flex justify-between items-center"> | |
| <h2 class="text-lg font-semibold">Results</h2> | |
| <span class="text-sm text-gray-500">Found ${ | |
| results.length | |
| } results</span> | |
| </div> | |
| ${results.map((result) => createResultCard(result)).join("")} | |
| ${ | |
| results.length >= RESULTS_PER_PAGE * page && | |
| RESULTS_PER_PAGE * (page + 1) <= MAX_RESULTS | |
| ? `<button | |
| onclick="loadMore()" | |
| class="w-full mt-4 px-6 py-3 bg-gray-100 hover:bg-gray-200 text-gray-700 rounded-lg transition-colors flex items-center gap-2 justify-center" | |
| > | |
| <i data-lucide="more-horizontal"></i> | |
| Load More Results | |
| </button>` | |
| : results.length >= MAX_RESULTS | |
| ? `<div class="text-center mt-4 p-6 bg-blue-50 rounded-lg"> | |
| <p class="text-gray-700 mb-3">You've reached the end of our dataset journey! (${MAX_RESULTS} results)</p> | |
| <p class="text-gray-600 mb-4">Can't find what you're looking for? Why not create and share your own dataset?</p> | |
| <a href="https://huggingface.co/docs/datasets/upload_dataset" | |
| target="_blank" | |
| class="inline-flex items-center gap-2 text-blue-500 hover:text-blue-700"> | |
| <i data-lucide="external-link"></i> | |
| Learn how to share your dataset on Hugging Face | |
| </a> | |
| </div>` | |
| : "" | |
| } | |
| `; | |
| lucide.createIcons(); | |
| } else { | |
| container.innerHTML = ` | |
| <div class="text-center text-gray-500"> | |
| No results found | |
| </div> | |
| `; | |
| } | |
| } | |
| // Show error message | |
| function showError(message) { | |
| const errorElement = document.getElementById("errorMessage"); | |
| errorElement.textContent = message; | |
| errorElement.classList.remove("hidden"); | |
| } | |
| // Event listeners | |
| document | |
| .getElementById("searchInput") | |
| .addEventListener("input", (e) => searchDatasets(e.target.value)); | |
| document | |
| .getElementById("datasetInput") | |
| .addEventListener("keydown", (e) => { | |
| if (e.key === "Enter") findSimilarDatasets(); | |
| }); | |
| // Update the findSimilarFromResult function | |
| function findSimilarFromResult(datasetId) { | |
| // Switch to the similar tab | |
| switchTab("similar"); | |
| // Set the dataset ID in the input without triggering the focus event | |
| const datasetInput = document.getElementById("datasetInput"); | |
| datasetInput.value = datasetId; | |
| // Hide suggestions box explicitly | |
| const suggestionsBox = document.getElementById("suggestionsBox"); | |
| suggestionsBox.classList.add("hidden"); | |
| // Trigger the search | |
| findSimilarDatasets(); | |
| } | |
| // Add accordion functionality | |
| function toggleAccordion() { | |
| const content = document.getElementById("accordionContent"); | |
| const icon = document.getElementById("accordionIcon"); | |
| content.classList.toggle("hidden"); | |
| icon.style.transform = content.classList.contains("hidden") | |
| ? "rotate(0deg)" | |
| : "rotate(90deg)"; | |
| } | |
| // Add the loadMore function | |
| function loadMore() { | |
| currentPage += 1; | |
| const activeTab = document.querySelector(".tab-trigger.active").id; | |
| if (activeTab === "searchTab") { | |
| const searchQuery = document.getElementById("searchInput").value; | |
| searchDatasets(searchQuery, currentPage); | |
| } else { | |
| findSimilarDatasets(currentPage); | |
| } | |
| } | |
| // Add this new function for toggling the preview | |
| function togglePreview(datasetId) { | |
| const content = document.getElementById(`preview-content-${datasetId}`); | |
| const icon = document.getElementById(`preview-icon-${datasetId}`); | |
| content.classList.toggle("hidden"); | |
| icon.style.transform = content.classList.contains("hidden") | |
| ? "rotate(0deg)" | |
| : "rotate(90deg)"; | |
| } | |
| // Update the event listeners section | |
| document.addEventListener("DOMContentLoaded", () => { | |
| const datasetInput = document.getElementById("datasetInput"); | |
| let programmaticFocus = false; | |
| // Add input event listener for suggestions | |
| datasetInput.addEventListener("input", async (e) => { | |
| const suggestionsBox = document.getElementById("suggestionsBox"); | |
| const value = e.target.value; | |
| if (!programmaticFocus) { | |
| if (!value) { | |
| // Show trending datasets when input is empty | |
| const trending = await fetchTrendingDatasets(); | |
| displaySuggestions(trending, suggestionsBox); | |
| } else { | |
| // Filter trending datasets based on input | |
| const trending = await fetchTrendingDatasets(); | |
| const filtered = trending.filter((dataset) => | |
| dataset.toLowerCase().includes(value.toLowerCase()) | |
| ); | |
| displaySuggestions(filtered, suggestionsBox); | |
| } | |
| } | |
| }); | |
| // Show trending datasets on focus only when not programmatically focused | |
| datasetInput.addEventListener("focus", async () => { | |
| if (!programmaticFocus) { | |
| const suggestionsBox = document.getElementById("suggestionsBox"); | |
| const trending = await fetchTrendingDatasets(); | |
| displaySuggestions(trending, suggestionsBox); | |
| } | |
| programmaticFocus = false; | |
| }); | |
| }); | |
| </script> | |
| </body> | |
| </html> | |