Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
|
@@ -244,7 +244,6 @@ html = """
|
|
| 244 |
</style>
|
| 245 |
</head>
|
| 246 |
<body class="bg-white dark:bg-gray-900 text-gray-800 dark:text-gray-200 min-h-screen">
|
| 247 |
-
<!-- Dark mode detection -->
|
| 248 |
<script>
|
| 249 |
if (window.matchMedia && window.matchMedia('(prefers-color-scheme: dark)').matches) {
|
| 250 |
document.documentElement.classList.add('dark');
|
|
@@ -259,15 +258,12 @@ html = """
|
|
| 259 |
</script>
|
| 260 |
|
| 261 |
<div class="container mx-auto px-4 py-8">
|
| 262 |
-
<!-- Header -->
|
| 263 |
<header class="text-center mb-8">
|
| 264 |
<h1 class="text-3xl font-bold mb-2">🤗 Infinite Dataset Hub ♾️</h1>
|
| 265 |
<p class="text-lg text-gray-600 dark:text-gray-400">Generate datasets from AI and real-world data sources</p>
|
| 266 |
</header>
|
| 267 |
|
| 268 |
-
<!-- Main Content -->
|
| 269 |
<main>
|
| 270 |
-
<!-- Search Section -->
|
| 271 |
<div id="search-page" class="mb-8">
|
| 272 |
<div class="max-w-3xl mx-auto">
|
| 273 |
<div class="mb-4">
|
|
@@ -300,7 +296,6 @@ html = """
|
|
| 300 |
</div>
|
| 301 |
</div>
|
| 302 |
|
| 303 |
-
<!-- Search Engine Selection Modal -->
|
| 304 |
<div id="engine-modal" class="fixed inset-0 bg-black bg-opacity-50 flex items-center justify-center z-50 hidden">
|
| 305 |
<div class="bg-white dark:bg-gray-800 rounded-lg p-6 max-w-lg w-full max-h-[80vh] overflow-y-auto">
|
| 306 |
<div class="flex justify-between items-center mb-4">
|
|
@@ -317,8 +312,7 @@ html = """
|
|
| 317 |
</p>
|
| 318 |
|
| 319 |
<div id="engine-options" class="space-y-2 mb-6">
|
| 320 |
-
<
|
| 321 |
-
</div>
|
| 322 |
|
| 323 |
<div class="flex justify-between">
|
| 324 |
<button id="select-all-engines" class="text-primary hover:underline">Select All</button>
|
|
@@ -334,8 +328,7 @@ html = """
|
|
| 334 |
</div>
|
| 335 |
|
| 336 |
<div id="dataset-results" class="grid grid-cols-1 md:grid-cols-2 gap-4 mt-6">
|
| 337 |
-
<
|
| 338 |
-
</div>
|
| 339 |
|
| 340 |
<div id="load-more-container" class="text-center mt-6 hidden">
|
| 341 |
<button id="load-more-button" class="bg-gray-200 dark:bg-gray-700 px-6 py-3 rounded-lg hover:bg-gray-300 dark:hover:bg-gray-600 transition">
|
|
@@ -345,7 +338,6 @@ html = """
|
|
| 345 |
</div>
|
| 346 |
</div>
|
| 347 |
|
| 348 |
-
<!-- Dataset Detail Page -->
|
| 349 |
<div id="dataset-page" class="hidden max-w-4xl mx-auto">
|
| 350 |
<button id="back-button" class="flex items-center text-primary mb-4 hover:underline">
|
| 351 |
<svg xmlns="http://www.w3.org/2000/svg" class="h-5 w-5 mr-1" viewBox="0 0 20 20" fill="currentColor">
|
|
@@ -423,7 +415,6 @@ html = """
|
|
| 423 |
</div>
|
| 424 |
</main>
|
| 425 |
|
| 426 |
-
<!-- Footer -->
|
| 427 |
<footer class="mt-12 text-center text-sm text-gray-600 dark:text-gray-400">
|
| 428 |
<p>Powered by Claude-3.7-Sonnet • Datasets generated from real sources and AI</p>
|
| 429 |
</footer>
|
|
@@ -485,115 +476,943 @@ html = """
|
|
| 485 |
const selectAllEngines = document.getElementById('select-all-engines');
|
| 486 |
const deselectAllEngines = document.getElementById('deselect-all-engines');
|
| 487 |
|
| 488 |
-
// Event Listeners
|
| 489 |
-
document.addEventListener('DOMContentLoaded', () => {
|
| 490 |
-
searchButton.addEventListener('click', performSearch);
|
| 491 |
-
searchInput.addEventListener('keypress', (e) => {
|
| 492 |
-
if (e.key === 'Enter') performSearch();
|
| 493 |
-
});
|
| 494 |
-
loadMoreButton.addEventListener('click', loadMoreDatasets);
|
| 495 |
-
backButton.addEventListener('click', showSearchPage);
|
| 496 |
-
generateFullButton.addEventListener('click', generateFullDataset);
|
| 497 |
-
downloadCsvButton.addEventListener('click', () => downloadData('csv'));
|
| 498 |
-
downloadJsonButton.addEventListener('click', () => downloadData('json'));
|
| 499 |
-
downloadParquetButton.addEventListener('click', () => downloadData('parquet'));
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 500 |
|
| 501 |
-
|
| 502 |
-
|
| 503 |
-
closeModalButton.addEventListener('click', hideEngineModal);
|
| 504 |
-
saveEnginesButton.addEventListener('click', saveEngineSettings);
|
| 505 |
-
selectAllEngines.addEventListener('click', () => toggleAllEngines(true));
|
| 506 |
-
deselectAllEngines.addEventListener('click', () => toggleAllEngines(false));
|
| 507 |
|
| 508 |
-
//
|
| 509 |
-
|
| 510 |
|
| 511 |
-
//
|
| 512 |
-
|
| 513 |
-
});
|
| 514 |
-
|
| 515 |
-
// Search Engine Settings
|
| 516 |
-
function populateEngineOptions() {
|
| 517 |
-
engineOptions.innerHTML = '';
|
| 518 |
|
| 519 |
-
|
| 520 |
-
|
| 521 |
-
|
| 522 |
-
|
| 523 |
-
|
| 524 |
-
|
| 525 |
-
optionDiv.innerHTML = `
|
| 526 |
-
<input type="checkbox" id="engine-${engine}" class="engine-checkbox mr-2 h-4 w-4"
|
| 527 |
-
value="${engine}" ${isChecked ? 'checked' : ''}>
|
| 528 |
-
<label for="engine-${engine}" class="cursor-pointer">${engine}</label>
|
| 529 |
-
`;
|
| 530 |
-
|
| 531 |
-
engineOptions.appendChild(optionDiv);
|
| 532 |
-
});
|
| 533 |
}
|
| 534 |
|
| 535 |
-
function
|
| 536 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 537 |
}
|
| 538 |
|
| 539 |
-
function
|
| 540 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 541 |
}
|
| 542 |
|
| 543 |
-
function
|
| 544 |
-
|
| 545 |
-
|
| 546 |
|
| 547 |
-
|
| 548 |
-
|
| 549 |
-
|
| 550 |
-
|
| 551 |
-
showNotification("At least one search engine must be selected. Using DuckDuckGo as default.");
|
| 552 |
-
}
|
| 553 |
|
| 554 |
-
|
| 555 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 556 |
}
|
| 557 |
|
| 558 |
-
function
|
| 559 |
-
|
| 560 |
-
|
| 561 |
-
cb.checked = select;
|
| 562 |
-
});
|
| 563 |
}
|
| 564 |
|
| 565 |
-
|
| 566 |
-
|
| 567 |
-
|
| 568 |
-
dataSourceText.textContent = useRealData ? "Using: Real + AI Data" : "Using: AI Data Only";
|
| 569 |
-
|
| 570 |
-
// Show or hide engine settings button
|
| 571 |
-
engineSettingsButton.style.display = useRealData ? "flex" : "none";
|
| 572 |
-
|
| 573 |
-
showNotification(`Switched to ${useRealData ? "combined real and synthetic" : "synthetic-only"} data mode`);
|
| 574 |
}
|
| 575 |
|
| 576 |
-
|
| 577 |
-
|
| 578 |
-
|
| 579 |
-
|
| 580 |
-
|
| 581 |
-
|
| 582 |
-
|
| 583 |
-
|
| 584 |
|
| 585 |
-
|
| 586 |
-
showLoadingSkeletons();
|
| 587 |
|
| 588 |
-
|
| 589 |
-
|
| 590 |
-
|
| 591 |
-
|
| 592 |
-
|
| 593 |
-
|
| 594 |
-
}
|
| 595 |
}
|
| 596 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 597 |
function searchWithRealData(query) {
|
| 598 |
// Randomly select a search engine from the user's selected engines
|
| 599 |
currentEngine = selectedEngines[Math.floor(Math.random() * selectedEngines.length)];
|
|
@@ -706,65 +1525,6 @@ html = """
|
|
| 706 |
}
|
| 707 |
}
|
| 708 |
|
| 709 |
-
function parseDatasetResults(content) {
|
| 710 |
-
const lines = content.split('\n');
|
| 711 |
-
const datasets = [];
|
| 712 |
-
|
| 713 |
-
lines.forEach(line => {
|
| 714 |
-
// Match lines that start with a number followed by a period
|
| 715 |
-
const match = line.match(/^\s*\d+\.\s+(.+?)\s+\((.+?)\)/);
|
| 716 |
-
if (match) {
|
| 717 |
-
const name = match[1].trim();
|
| 718 |
-
const tags = match[2].split(',').map(tag => tag.trim());
|
| 719 |
-
datasets.push({ name, tags });
|
| 720 |
-
}
|
| 721 |
-
});
|
| 722 |
-
|
| 723 |
-
return datasets;
|
| 724 |
-
}
|
| 725 |
-
|
| 726 |
-
function displayDatasets(datasets) {
|
| 727 |
-
datasets.forEach(dataset => {
|
| 728 |
-
const card = document.createElement('div');
|
| 729 |
-
card.className = 'dataset-card bg-white dark:bg-gray-800 rounded-lg p-4 border border-gray-200 dark:border-gray-700 cursor-pointer relative';
|
| 730 |
-
|
| 731 |
-
const tagsHtml = dataset.tags.map(tag =>
|
| 732 |
-
`<span class="inline-block bg-gray-100 dark:bg-gray-700 text-gray-800 dark:text-gray-300 text-xs px-2 py-1 rounded mr-1 mb-1">${tag}</span>`
|
| 733 |
-
).join('');
|
| 734 |
-
|
| 735 |
-
// Add a badge for real data
|
| 736 |
-
let badgeHtml = '';
|
| 737 |
-
if (dataset.isReal) {
|
| 738 |
-
badgeHtml = `<span class="engine-badge" title="Data from ${dataset.engine}">${dataset.engine.split('.')[0]}</span>`;
|
| 739 |
-
}
|
| 740 |
-
|
| 741 |
-
card.innerHTML = `
|
| 742 |
-
${badgeHtml}
|
| 743 |
-
<h3 class="text-lg font-semibold mb-2">${dataset.name}</h3>
|
| 744 |
-
<div class="flex flex-wrap mt-2">${tagsHtml}</div>
|
| 745 |
-
`;
|
| 746 |
-
|
| 747 |
-
card.addEventListener('click', () => showDatasetDetails(dataset));
|
| 748 |
-
resultsContainer.appendChild(card);
|
| 749 |
-
});
|
| 750 |
-
}
|
| 751 |
-
|
| 752 |
-
function showLoadingSkeletons() {
|
| 753 |
-
for (let i = 0; i < 4; i++) {
|
| 754 |
-
const skeleton = document.createElement('div');
|
| 755 |
-
skeleton.className = 'bg-white dark:bg-gray-800 rounded-lg p-4 border border-gray-200 dark:border-gray-700';
|
| 756 |
-
skeleton.innerHTML = `
|
| 757 |
-
<div class="shimmer h-6 w-3/4 mb-2"></div>
|
| 758 |
-
<div class="flex flex-wrap mt-2">
|
| 759 |
-
<div class="shimmer h-6 w-16 rounded mr-1 mb-1"></div>
|
| 760 |
-
<div class="shimmer h-6 w-20 rounded mr-1 mb-1"></div>
|
| 761 |
-
<div class="shimmer h-6 w-24 rounded mr-1 mb-1"></div>
|
| 762 |
-
</div>
|
| 763 |
-
`;
|
| 764 |
-
resultsContainer.appendChild(skeleton);
|
| 765 |
-
}
|
| 766 |
-
}
|
| 767 |
-
|
| 768 |
function loadMoreDatasets() {
|
| 769 |
currentPage++;
|
| 770 |
|
|
@@ -1230,6 +1990,8 @@ html = """
|
|
| 1230 |
);
|
| 1231 |
} catch (err) {
|
| 1232 |
showError("Error sending message: " + err);
|
|
|
|
|
|
|
| 1233 |
}
|
| 1234 |
};
|
| 1235 |
|
|
@@ -2041,13 +2803,17 @@ with gr.Blocks(css=css) as demo:
|
|
| 2041 |
engine_settings_button = gr.Button("Configure Search Engines", icon="https://img.icons8.com/ios-filled/50/000000/settings--v1.png", size="sm")
|
| 2042 |
|
| 2043 |
# Engine Selection Modal
|
| 2044 |
-
with gr.
|
| 2045 |
-
gr.
|
| 2046 |
-
|
| 2047 |
-
|
| 2048 |
-
|
| 2049 |
-
|
| 2050 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2051 |
|
| 2052 |
# --- Dataset Detail Page UI ---
|
| 2053 |
with gr.Column(visible=False, elem_id="dataset-page") as dataset_page:
|
|
@@ -2094,7 +2860,7 @@ with gr.Blocks(css=css) as demo:
|
|
| 2094 |
if "I'm sorry" in line or "policy" in line: raise gr.Error("Inappropriate content detected.")
|
| 2095 |
if generated_count >= MAX_NB_ITEMS_PER_GENERATION_CALL: break
|
| 2096 |
|
| 2097 |
-
match = re.match(r"^\s*\d+\.\s+(.+?)\s+
|
| 2098 |
if match:
|
| 2099 |
dataset_name, tags = match.groups()
|
| 2100 |
dataset_name, tags = dataset_name.strip(), tags.strip()
|
|
@@ -2359,9 +3125,9 @@ with gr.Blocks(css=css) as demo:
|
|
| 2359 |
outputs=[selected_engines_state, current_engine_state, gr.Info()]
|
| 2360 |
)
|
| 2361 |
|
| 2362 |
-
engine_settings_button.click(lambda:
|
| 2363 |
-
|
| 2364 |
-
|
| 2365 |
# Initial App Load Logic
|
| 2366 |
@demo.load(outputs=([search_page, dataset_page, dataset_title_md, dataset_description_md, dataset_source_badge, dataset_source_info, dataset_share_textbox, full_dataset_section, save_dataset_button, open_dataset_message, search_bar] + # Outputs for detail page and search bar
|
| 2367 |
buttons + [generated_texts_state] + # Outputs for search results buttons and state
|
|
@@ -2424,4 +3190,4 @@ with gr.Blocks(css=css) as demo:
|
|
| 2424 |
|
| 2425 |
|
| 2426 |
if __name__ == "__main__":
|
| 2427 |
-
demo.launch(share=False, server_name="0.0.0.0")
|
|
|
|
| 244 |
</style>
|
| 245 |
</head>
|
| 246 |
<body class="bg-white dark:bg-gray-900 text-gray-800 dark:text-gray-200 min-h-screen">
|
|
|
|
| 247 |
<script>
|
| 248 |
if (window.matchMedia && window.matchMedia('(prefers-color-scheme: dark)').matches) {
|
| 249 |
document.documentElement.classList.add('dark');
|
|
|
|
| 258 |
</script>
|
| 259 |
|
| 260 |
<div class="container mx-auto px-4 py-8">
|
|
|
|
| 261 |
<header class="text-center mb-8">
|
| 262 |
<h1 class="text-3xl font-bold mb-2">🤗 Infinite Dataset Hub ♾️</h1>
|
| 263 |
<p class="text-lg text-gray-600 dark:text-gray-400">Generate datasets from AI and real-world data sources</p>
|
| 264 |
</header>
|
| 265 |
|
|
|
|
| 266 |
<main>
|
|
|
|
| 267 |
<div id="search-page" class="mb-8">
|
| 268 |
<div class="max-w-3xl mx-auto">
|
| 269 |
<div class="mb-4">
|
|
|
|
| 296 |
</div>
|
| 297 |
</div>
|
| 298 |
|
|
|
|
| 299 |
<div id="engine-modal" class="fixed inset-0 bg-black bg-opacity-50 flex items-center justify-center z-50 hidden">
|
| 300 |
<div class="bg-white dark:bg-gray-800 rounded-lg p-6 max-w-lg w-full max-h-[80vh] overflow-y-auto">
|
| 301 |
<div class="flex justify-between items-center mb-4">
|
|
|
|
| 312 |
</p>
|
| 313 |
|
| 314 |
<div id="engine-options" class="space-y-2 mb-6">
|
| 315 |
+
</div>
|
|
|
|
| 316 |
|
| 317 |
<div class="flex justify-between">
|
| 318 |
<button id="select-all-engines" class="text-primary hover:underline">Select All</button>
|
|
|
|
| 328 |
</div>
|
| 329 |
|
| 330 |
<div id="dataset-results" class="grid grid-cols-1 md:grid-cols-2 gap-4 mt-6">
|
| 331 |
+
</div>
|
|
|
|
| 332 |
|
| 333 |
<div id="load-more-container" class="text-center mt-6 hidden">
|
| 334 |
<button id="load-more-button" class="bg-gray-200 dark:bg-gray-700 px-6 py-3 rounded-lg hover:bg-gray-300 dark:hover:bg-gray-600 transition">
|
|
|
|
| 338 |
</div>
|
| 339 |
</div>
|
| 340 |
|
|
|
|
| 341 |
<div id="dataset-page" class="hidden max-w-4xl mx-auto">
|
| 342 |
<button id="back-button" class="flex items-center text-primary mb-4 hover:underline">
|
| 343 |
<svg xmlns="http://www.w3.org/2000/svg" class="h-5 w-5 mr-1" viewBox="0 0 20 20" fill="currentColor">
|
|
|
|
| 415 |
</div>
|
| 416 |
</main>
|
| 417 |
|
|
|
|
| 418 |
<footer class="mt-12 text-center text-sm text-gray-600 dark:text-gray-400">
|
| 419 |
<p>Powered by Claude-3.7-Sonnet • Datasets generated from real sources and AI</p>
|
| 420 |
</footer>
|
|
|
|
| 476 |
const selectAllEngines = document.getElementById('select-all-engines');
|
| 477 |
const deselectAllEngines = document.getElementById('deselect-all-engines');
|
| 478 |
|
| 479 |
+
// Event Listeners
|
| 480 |
+
document.addEventListener('DOMContentLoaded', () => {
|
| 481 |
+
searchButton.addEventListener('click', performSearch);
|
| 482 |
+
searchInput.addEventListener('keypress', (e) => {
|
| 483 |
+
if (e.key === 'Enter') performSearch();
|
| 484 |
+
});
|
| 485 |
+
loadMoreButton.addEventListener('click', loadMoreDatasets);
|
| 486 |
+
backButton.addEventListener('click', showSearchPage);
|
| 487 |
+
generateFullButton.addEventListener('click', generateFullDataset);
|
| 488 |
+
downloadCsvButton.addEventListener('click', () => downloadData('csv'));
|
| 489 |
+
downloadJsonButton.addEventListener('click', () => downloadData('json'));
|
| 490 |
+
downloadParquetButton.addEventListener('click', () => downloadData('parquet'));
|
| 491 |
+
|
| 492 |
+
dataSourceToggle.addEventListener('change', toggleDataSource);
|
| 493 |
+
engineSettingsButton.addEventListener('click', showEngineModal);
|
| 494 |
+
closeModalButton.addEventListener('click', hideEngineModal);
|
| 495 |
+
saveEnginesButton.addEventListener('click', saveEngineSettings);
|
| 496 |
+
selectAllEngines.addEventListener('click', () => toggleAllEngines(true));
|
| 497 |
+
deselectAllEngines.addEventListener('click', () => toggleAllEngines(false));
|
| 498 |
+
|
| 499 |
+
// Initialize engine options
|
| 500 |
+
populateEngineOptions();
|
| 501 |
+
|
| 502 |
+
// Show initial placeholder datasets
|
| 503 |
+
showPlaceholderDatasets();
|
| 504 |
+
});
|
| 505 |
+
|
| 506 |
+
// Search Engine Settings
|
| 507 |
+
function populateEngineOptions() {
|
| 508 |
+
engineOptions.innerHTML = '';
|
| 509 |
+
|
| 510 |
+
searchEngines.forEach(engine => {
|
| 511 |
+
const isChecked = selectedEngines.includes(engine);
|
| 512 |
+
|
| 513 |
+
const optionDiv = document.createElement('div');
|
| 514 |
+
optionDiv.className = 'flex items-center';
|
| 515 |
+
|
| 516 |
+
optionDiv.innerHTML = `
|
| 517 |
+
<input type="checkbox" id="engine-${engine}" class="engine-checkbox mr-2 h-4 w-4"
|
| 518 |
+
value="${engine}" ${isChecked ? 'checked' : ''}>
|
| 519 |
+
<label for="engine-${engine}" class="cursor-pointer">${engine}</label>
|
| 520 |
+
`;
|
| 521 |
+
|
| 522 |
+
engineOptions.appendChild(optionDiv);
|
| 523 |
+
});
|
| 524 |
+
}
|
| 525 |
+
|
| 526 |
+
function showEngineModal() {
|
| 527 |
+
engineModal.classList.remove('hidden');
|
| 528 |
+
}
|
| 529 |
+
|
| 530 |
+
function hideEngineModal() {
|
| 531 |
+
engineModal.classList.add('hidden');
|
| 532 |
+
}
|
| 533 |
+
|
| 534 |
+
function saveEngineSettings() {
|
| 535 |
+
const checkboxes = document.querySelectorAll('.engine-checkbox:checked');
|
| 536 |
+
selectedEngines = Array.from(checkboxes).map(cb => cb.value);
|
| 537 |
+
|
| 538 |
+
if (selectedEngines.length === 0) {
|
| 539 |
+
// Ensure at least one engine is selected
|
| 540 |
+
selectedEngines = ["DuckDuckGo.com"];
|
| 541 |
+
document.getElementById(`engine-DuckDuckGo.com`).checked = true;
|
| 542 |
+
showNotification("At least one search engine must be selected. Using DuckDuckGo as default.");
|
| 543 |
+
}
|
| 544 |
+
|
| 545 |
+
hideEngineModal();
|
| 546 |
+
showNotification(`Updated search engine settings. Using ${selectedEngines.length} engines.`);
|
| 547 |
+
}
|
| 548 |
+
|
| 549 |
+
function toggleAllEngines(select) {
|
| 550 |
+
const checkboxes = document.querySelectorAll('.engine-checkbox');
|
| 551 |
+
checkboxes.forEach(cb => {
|
| 552 |
+
cb.checked = select;
|
| 553 |
+
});
|
| 554 |
+
}
|
| 555 |
+
|
| 556 |
+
// Toggle data source between real and AI
|
| 557 |
+
function toggleDataSource() {
|
| 558 |
+
useRealData = dataSourceToggle.checked;
|
| 559 |
+
dataSourceText.textContent = useRealData ? "Using: Real + AI Data" : "Using: AI Data Only";
|
| 560 |
+
|
| 561 |
+
// Show or hide engine settings button
|
| 562 |
+
engineSettingsButton.style.display = useRealData ? "flex" : "none";
|
| 563 |
+
|
| 564 |
+
showNotification(`Switched to ${useRealData ? "combined real and synthetic" : "synthetic-only"} data mode`);
|
| 565 |
+
}
|
| 566 |
+
|
| 567 |
+
// Search functionality
|
| 568 |
+
function performSearch() {
|
| 569 |
+
const query = searchInput.value.trim();
|
| 570 |
+
if (!query) return;
|
| 571 |
+
|
| 572 |
+
currentSearchQuery = query;
|
| 573 |
+
currentPage = 1;
|
| 574 |
+
currentDatasets = [];
|
| 575 |
+
|
| 576 |
+
resultsContainer.innerHTML = '';
|
| 577 |
+
showLoadingSkeletons();
|
| 578 |
+
|
| 579 |
+
if (useRealData) {
|
| 580 |
+
// Use real data from search engines + AI
|
| 581 |
+
searchWithRealData(query);
|
| 582 |
+
} else {
|
| 583 |
+
// Use only AI-generated data
|
| 584 |
+
searchWithAIData(query);
|
| 585 |
+
}
|
| 586 |
+
}
|
| 587 |
+
|
| 588 |
+
function searchWithRealData(query) {
|
| 589 |
+
// Randomly select a search engine from the user's selected engines
|
| 590 |
+
currentEngine = selectedEngines[Math.floor(Math.random() * selectedEngines.length)];
|
| 591 |
+
|
| 592 |
+
// Register handler for dataset names based on real search results
|
| 593 |
+
window.Poe.registerHandler("real-search-handler", (result) => {
|
| 594 |
+
if (result.status === "error") {
|
| 595 |
+
showError("Error querying search engines");
|
| 596 |
+
return;
|
| 597 |
+
}
|
| 598 |
+
|
| 599 |
+
const message = result.responses[0];
|
| 600 |
+
|
| 601 |
+
if (message.status === "complete") {
|
| 602 |
+
// Parse the dataset names and tags from the response
|
| 603 |
+
const datasets = parseDatasetResults(message.content);
|
| 604 |
+
datasets.forEach(dataset => {
|
| 605 |
+
dataset.isReal = true;
|
| 606 |
+
dataset.engine = currentEngine;
|
| 607 |
+
});
|
| 608 |
+
|
| 609 |
+
currentDatasets = datasets;
|
| 610 |
+
|
| 611 |
+
// Display the datasets
|
| 612 |
+
resultsContainer.innerHTML = '';
|
| 613 |
+
displayDatasets(datasets);
|
| 614 |
+
|
| 615 |
+
// Show load more button if we have results
|
| 616 |
+
if (datasets.length > 0) {
|
| 617 |
+
loadMoreContainer.classList.remove('hidden');
|
| 618 |
+
}
|
| 619 |
+
}
|
| 620 |
+
});
|
| 621 |
+
|
| 622 |
+
try {
|
| 623 |
+
window.Poe.sendUserMessage(
|
| 624 |
+
`@Claude-3.7-Sonnet You are a data specialist who can transform real search results into structured datasets.
|
| 625 |
+
|
| 626 |
+
A user is searching for data about: "${query}"
|
| 627 |
+
|
| 628 |
+
Imagine you've queried ${currentEngine} and received real search results. Create a list of 10 specific datasets that could be created from these search results.
|
| 629 |
+
|
| 630 |
+
For each dataset:
|
| 631 |
+
1. Give it a clear, specific name related to the search topic
|
| 632 |
+
2. Include 3-5 relevant tags in parentheses, with one tag specifying the ML task type (classification, regression, clustering, etc.)
|
| 633 |
+
|
| 634 |
+
Format each dataset as:
|
| 635 |
+
1. DatasetName (tag1, tag2, ml_task_tag)
|
| 636 |
+
|
| 637 |
+
Make these datasets sound like real collections that could be created from ${currentEngine} search results on "${query}".`,
|
| 638 |
+
{
|
| 639 |
+
handler: "real-search-handler",
|
| 640 |
+
stream: false,
|
| 641 |
+
openChat: false
|
| 642 |
+
}
|
| 643 |
+
);
|
| 644 |
+
} catch (err) {
|
| 645 |
+
showError("Error sending message: " + err);
|
| 646 |
+
// Fall back to AI data
|
| 647 |
+
searchWithAIData(query);
|
| 648 |
+
}
|
| 649 |
+
}
|
| 650 |
+
|
| 651 |
+
function searchWithAIData(query) {
|
| 652 |
+
// Register handler for AI-generated dataset names
|
| 653 |
+
window.Poe.registerHandler("dataset-search-handler", (result) => {
|
| 654 |
+
if (result.status === "error") {
|
| 655 |
+
showError("Error generating datasets");
|
| 656 |
+
return;
|
| 657 |
+
}
|
| 658 |
+
|
| 659 |
+
const message = result.responses[0];
|
| 660 |
+
|
| 661 |
+
if (message.status === "complete") {
|
| 662 |
+
// Parse the dataset names and tags from the response
|
| 663 |
+
const datasets = parseDatasetResults(message.content);
|
| 664 |
+
datasets.forEach(dataset => {
|
| 665 |
+
dataset.isReal = false;
|
| 666 |
+
});
|
| 667 |
+
|
| 668 |
+
currentDatasets = datasets;
|
| 669 |
+
|
| 670 |
+
// Display the datasets
|
| 671 |
+
resultsContainer.innerHTML = '';
|
| 672 |
+
displayDatasets(datasets);
|
| 673 |
+
|
| 674 |
+
// Show load more button if we have results
|
| 675 |
+
if (datasets.length > 0) {
|
| 676 |
+
loadMoreContainer.classList.remove('hidden');
|
| 677 |
+
}
|
| 678 |
+
}
|
| 679 |
+
});
|
| 680 |
+
|
| 681 |
+
try {
|
| 682 |
+
window.Poe.sendUserMessage(
|
| 683 |
+
`@Claude-3.7-Sonnet A Machine Learning Practioner is looking for a dataset that matches '${query}'.
|
| 684 |
+
Generate a list of ${MAX_DATASETS_PER_PAGE} names of quality datasets that don't exist but sound plausible and would
|
| 685 |
+
be helpful. Feel free to reuse words from the query '${query}' to name the datasets.
|
| 686 |
+
Every dataset should be about '${query}' and have descriptive tags/keywords including the ML task name associated with the dataset (classification, regression, anomaly detection, etc.). Use the following format:
|
| 687 |
+
1. DatasetName1 (tag1, tag2, tag3)
|
| 688 |
+
2. DatasetName2 (tag1, tag2, tag3)`,
|
| 689 |
+
{
|
| 690 |
+
handler: "dataset-search-handler",
|
| 691 |
+
stream: false,
|
| 692 |
+
openChat: false
|
| 693 |
+
}
|
| 694 |
+
);
|
| 695 |
+
} catch (err) {
|
| 696 |
+
showError("Error sending message: " + err);
|
| 697 |
+
}
|
| 698 |
+
}
|
| 699 |
+
|
| 700 |
+
function loadMoreDatasets() {
|
| 701 |
+
currentPage++;
|
| 702 |
+
|
| 703 |
+
// Use the same data source (real or AI) as the initial search
|
| 704 |
+
if (useRealData) {
|
| 705 |
+
loadMoreRealDatasets();
|
| 706 |
+
} else {
|
| 707 |
+
loadMoreAIDatasets();
|
| 708 |
+
}
|
| 709 |
+
}
|
| 710 |
+
|
| 711 |
+
function loadMoreRealDatasets() {
|
| 712 |
+
// Rotate to a different search engine for variety
|
| 713 |
+
const previousEngine = currentEngine;
|
| 714 |
+
while (currentEngine === previousEngine && selectedEngines.length > 1) {
|
| 715 |
+
currentEngine = selectedEngines[Math.floor(Math.random() * selectedEngines.length)];
|
| 716 |
+
}
|
| 717 |
+
|
| 718 |
+
// Register handler for more datasets
|
| 719 |
+
window.Poe.registerHandler("more-real-datasets-handler", (result) => {
|
| 720 |
+
if (result.status === "error") {
|
| 721 |
+
showError("Error generating more datasets");
|
| 722 |
+
return;
|
| 723 |
+
}
|
| 724 |
+
|
| 725 |
+
const message = result.responses[0];
|
| 726 |
+
|
| 727 |
+
if (message.status === "complete") {
|
| 728 |
+
// Parse the dataset names and tags from the response
|
| 729 |
+
const datasets = parseDatasetResults(message.content);
|
| 730 |
+
datasets.forEach(dataset => {
|
| 731 |
+
dataset.isReal = true;
|
| 732 |
+
dataset.engine = currentEngine;
|
| 733 |
+
});
|
| 734 |
+
|
| 735 |
+
currentDatasets = [...currentDatasets, ...datasets];
|
| 736 |
+
|
| 737 |
+
// Display the datasets
|
| 738 |
+
displayDatasets(datasets);
|
| 739 |
+
}
|
| 740 |
+
});
|
| 741 |
+
|
| 742 |
+
try {
|
| 743 |
+
window.Poe.sendUserMessage(
|
| 744 |
+
`@Claude-3.7-Sonnet You're a data specialist who can transform real search results into structured datasets.
|
| 745 |
+
|
| 746 |
+
Continue our previous search for data about: "${currentSearchQuery}"
|
| 747 |
+
|
| 748 |
+
Now let's use a different search engine: ${currentEngine}
|
| 749 |
+
|
| 750 |
+
Create 10 more specific datasets that could be created from these search results. Make sure these are different from the previous datasets.
|
| 751 |
+
|
| 752 |
+
Use the same format:
|
| 753 |
+
1. DatasetName (tag1, tag2, ml_task_tag)
|
| 754 |
+
|
| 755 |
+
Make these datasets sound like real collections that could be created from ${currentEngine} search results on "${currentSearchQuery}".`,
|
| 756 |
+
{
|
| 757 |
+
handler: "more-real-datasets-handler",
|
| 758 |
+
stream: false,
|
| 759 |
+
openChat: false
|
| 760 |
+
}
|
| 761 |
+
);
|
| 762 |
+
} catch (err) {
|
| 763 |
+
showError("Error sending message: " + err);
|
| 764 |
+
// Fall back to AI data
|
| 765 |
+
loadMoreAIDatasets();
|
| 766 |
+
}
|
| 767 |
+
}
|
| 768 |
+
|
| 769 |
+
function loadMoreAIDatasets() {
|
| 770 |
+
// Register handler for more AI datasets
|
| 771 |
+
window.Poe.registerHandler("more-datasets-handler", (result) => {
|
| 772 |
+
if (result.status === "error") {
|
| 773 |
+
showError("Error generating more datasets");
|
| 774 |
+
return;
|
| 775 |
+
}
|
| 776 |
+
|
| 777 |
+
const message = result.responses[0];
|
| 778 |
+
|
| 779 |
+
if (message.status === "complete") {
|
| 780 |
+
// Parse the dataset names and tags from the response
|
| 781 |
+
const datasets = parseDatasetResults(message.content);
|
| 782 |
+
datasets.forEach(dataset => {
|
| 783 |
+
dataset.isReal = false;
|
| 784 |
+
});
|
| 785 |
+
|
| 786 |
+
currentDatasets = [...currentDatasets, ...datasets];
|
| 787 |
+
|
| 788 |
+
// Display the datasets
|
| 789 |
+
displayDatasets(datasets);
|
| 790 |
+
}
|
| 791 |
+
});
|
| 792 |
+
|
| 793 |
+
try {
|
| 794 |
+
window.Poe.sendUserMessage(
|
| 795 |
+
`@Claude-3.7-Sonnet Please generate ${MAX_DATASETS_PER_PAGE} more dataset names about '${currentSearchQuery}'. Use the same format as before:
|
| 796 |
+
1. DatasetName1 (tag1, tag2, tag3)
|
| 797 |
+
Make sure these are completely different from previous suggestions.`,
|
| 798 |
+
{
|
| 799 |
+
handler: "more-datasets-handler",
|
| 800 |
+
stream: false,
|
| 801 |
+
openChat: false
|
| 802 |
+
}
|
| 803 |
+
);
|
| 804 |
+
} catch (err) {
|
| 805 |
+
showError("Error sending message: " + err);
|
| 806 |
+
}
|
| 807 |
+
}
|
| 808 |
+
|
| 809 |
+
function showDatasetDetails(dataset) {
|
| 810 |
+
currentDataset = dataset;
|
| 811 |
+
searchPage.classList.add('hidden');
|
| 812 |
+
datasetPage.classList.remove('hidden');
|
| 813 |
+
|
| 814 |
+
// Update UI with dataset info
|
| 815 |
+
datasetTitle.textContent = dataset.name;
|
| 816 |
+
datasetTags.innerHTML = dataset.tags.map(tag =>
|
| 817 |
+
`<span class="inline-block bg-gray-100 dark:bg-gray-700 text-gray-800 dark:text-gray-300 text-xs px-2 py-1 rounded mr-1 mb-1">${tag}</span>`
|
| 818 |
+
).join('');
|
| 819 |
+
|
| 820 |
+
// Update source badge
|
| 821 |
+
if (dataset.isReal) {
|
| 822 |
+
dataSourceBadge.textContent = "Real Data";
|
| 823 |
+
dataSourceBadge.className = "px-3 py-1 rounded-full text-xs font-medium bg-green-100 text-green-800 dark:bg-green-900 dark:text-green-200";
|
| 824 |
+
sourceDetails.innerHTML = `This dataset is based on real information queried from <strong>${dataset.engine}</strong> for the search term "<strong>${currentSearchQuery}</strong>". The data has been structured for machine learning use.`;
|
| 825 |
+
} else {
|
| 826 |
+
dataSourceBadge.textContent = "AI-Generated";
|
| 827 |
+
dataSourceBadge.className = "px-3 py-1 rounded-full text-xs font-medium bg-purple-100 text-purple-800 dark:bg-purple-900 dark:text-purple-200";
|
| 828 |
+
sourceDetails.innerHTML = `This is an AI-generated dataset created using Claude-3.7-Sonnet. The content is synthetic and designed to represent plausible data related to "${currentSearchQuery}".`;
|
| 829 |
+
}
|
| 830 |
+
|
| 831 |
+
// Clear previous content
|
| 832 |
+
datasetDescription.innerHTML = '<div class="shimmer h-4 w-full mb-2"></div>'.repeat(3);
|
| 833 |
+
previewTable.innerHTML = '';
|
| 834 |
+
fullDatasetSection.classList.add('hidden');
|
| 835 |
+
generateStatus.classList.add('hidden');
|
| 836 |
+
generateFullButton.disabled = false;
|
| 837 |
+
|
| 838 |
+
// Reset full dataset
|
| 839 |
+
fullDatasetRows = [];
|
| 840 |
+
|
| 841 |
+
// Generate dataset preview - different approach for real vs AI data
|
| 842 |
+
if (dataset.isReal) {
|
| 843 |
+
generateRealDatasetPreview(dataset);
|
| 844 |
+
} else {
|
| 845 |
+
generateAIDatasetPreview(dataset);
|
| 846 |
+
}
|
| 847 |
+
|
| 848 |
+
// Scroll to top
|
| 849 |
+
window.scrollTo(0, 0);
|
| 850 |
+
}
|
| 851 |
+
|
| 852 |
+
function generateRealDatasetPreview(dataset) {
|
| 853 |
+
window.Poe.registerHandler("real-preview-handler", (result) => {
|
| 854 |
+
if (result.status === "error") {
|
| 855 |
+
datasetDescription.innerHTML = '<p class="text-red-500">Error generating dataset preview</p>';
|
| 856 |
+
return;
|
| 857 |
+
}
|
| 858 |
+
|
| 859 |
+
const message = result.responses[0];
|
| 860 |
+
|
| 861 |
+
if (message.status === "complete") {
|
| 862 |
+
const content = message.content;
|
| 863 |
+
|
| 864 |
+
// Extract description and CSV
|
| 865 |
+
const parts = content.split('**CSV Content Preview:**');
|
| 866 |
+
let description = "";
|
| 867 |
+
let csvContent = "";
|
| 868 |
+
|
| 869 |
+
if (parts.length > 1) {
|
| 870 |
+
description = parts[0].replace('**Dataset Description:**', '').trim();
|
| 871 |
+
csvContent = parts[1].trim();
|
| 872 |
+
|
| 873 |
+
// Clean up CSV content (remove markdown code block markers)
|
| 874 |
+
csvContent = csvContent.replace(/```csv\n|```\n|```/g, '').trim();
|
| 875 |
+
} else {
|
| 876 |
+
description = "No description available";
|
| 877 |
+
csvContent = content;
|
| 878 |
+
}
|
| 879 |
+
|
| 880 |
+
// Display description
|
| 881 |
+
datasetDescription.innerHTML = marked.parse(description);
|
| 882 |
+
|
| 883 |
+
// Parse and display CSV preview
|
| 884 |
+
try {
|
| 885 |
+
const results = Papa.parse(csvContent, {
|
| 886 |
+
header: true,
|
| 887 |
+
skipEmptyLines: true
|
| 888 |
+
});
|
| 889 |
+
|
| 890 |
+
if (results.data && results.data.length > 0) {
|
| 891 |
+
// Create table from CSV data
|
| 892 |
+
createTable(previewTable, results.data, results.meta.fields);
|
| 893 |
+
} else {
|
| 894 |
+
previewTable.innerHTML = '<p class="p-4 text-red-500">No preview data available</p>';
|
| 895 |
+
}
|
| 896 |
+
} catch (err) {
|
| 897 |
+
previewTable.innerHTML = `<p class="p-4 text-red-500">Error parsing CSV: ${err.message}</p>`;
|
| 898 |
+
}
|
| 899 |
+
}
|
| 900 |
+
});
|
| 901 |
+
|
| 902 |
+
try {
|
| 903 |
+
const tagsStr = dataset.tags.join(', ');
|
| 904 |
+
window.Poe.sendUserMessage(
|
| 905 |
+
`@Claude-3.7-Sonnet You're a specialist in converting web search results into structured data.
|
| 906 |
+
|
| 907 |
+
Based on search results from ${dataset.engine} about "${currentSearchQuery}",
|
| 908 |
+
create a preview of the dataset "${dataset.name}" with tags "${tagsStr}".
|
| 909 |
+
|
| 910 |
+
First, write a detailed description of what this dataset contains, its structure, and how it was constructed from web search results.
|
| 911 |
+
|
| 912 |
+
Then, generate a realistic 5-row CSV preview that resembles data you might get if you scraped and structured real results from ${dataset.engine}.
|
| 913 |
+
|
| 914 |
+
Format your response with:
|
| 915 |
+
**Dataset Description:** [detailed description]
|
| 916 |
+
|
| 917 |
+
**CSV Content Preview:**
|
| 918 |
+
\`\`\`csv
|
| 919 |
+
[CSV header and 5 rows of realistic data]
|
| 920 |
+
\`\`\`
|
| 921 |
+
|
| 922 |
+
Include relevant columns for the dataset type, with proper labels/categories where appropriate. The data should look like it came from real sources.`,
|
| 923 |
+
{
|
| 924 |
+
handler: "real-preview-handler",
|
| 925 |
+
stream: false,
|
| 926 |
+
openChat: false
|
| 927 |
+
}
|
| 928 |
+
);
|
| 929 |
+
} catch (err) {
|
| 930 |
+
datasetDescription.innerHTML = `<p class="text-red-500">Error: ${err.message}</p>`;
|
| 931 |
+
}
|
| 932 |
+
}
|
| 933 |
+
|
| 934 |
+
function generateAIDatasetPreview(dataset) {
|
| 935 |
+
window.Poe.registerHandler("dataset-preview-handler", (result) => {
|
| 936 |
+
if (result.status === "error") {
|
| 937 |
+
datasetDescription.innerHTML = '<p class="text-red-500">Error generating dataset preview</p>';
|
| 938 |
+
return;
|
| 939 |
+
}
|
| 940 |
+
|
| 941 |
+
const message = result.responses[0];
|
| 942 |
+
|
| 943 |
+
if (message.status === "complete") {
|
| 944 |
+
const content = message.content;
|
| 945 |
+
|
| 946 |
+
// Extract description and CSV
|
| 947 |
+
const parts = content.split('**CSV Content Preview:**');
|
| 948 |
+
let description = "";
|
| 949 |
+
let csvContent = "";
|
| 950 |
+
|
| 951 |
+
if (parts.length > 1) {
|
| 952 |
+
description = parts[0].replace('**Dataset Description:**', '').trim();
|
| 953 |
+
csvContent = parts[1].trim();
|
| 954 |
+
|
| 955 |
+
// Clean up CSV content (remove markdown code block markers)
|
| 956 |
+
csvContent = csvContent.replace(/```csv\n|```\n|```/g, '').trim();
|
| 957 |
+
} else {
|
| 958 |
+
description = "No description available";
|
| 959 |
+
csvContent = content;
|
| 960 |
+
}
|
| 961 |
+
|
| 962 |
+
// Display description
|
| 963 |
+
datasetDescription.innerHTML = marked.parse(description);
|
| 964 |
+
|
| 965 |
+
// Parse and display CSV preview
|
| 966 |
+
try {
|
| 967 |
+
const results = Papa.parse(csvContent, {
|
| 968 |
+
header: true,
|
| 969 |
+
skipEmptyLines: true
|
| 970 |
+
});
|
| 971 |
+
|
| 972 |
+
if (results.data && results.data.length > 0) {
|
| 973 |
+
// Create table from CSV data
|
| 974 |
+
createTable(previewTable, results.data, results.meta.fields);
|
| 975 |
+
} else {
|
| 976 |
+
previewTable.innerHTML = '<p class="p-4 text-red-500">No preview data available</p>';
|
| 977 |
+
}
|
| 978 |
+
} catch (err) {
|
| 979 |
+
previewTable.innerHTML = `<p class="p-4 text-red-500">Error parsing CSV: ${err.message}</p>`;
|
| 980 |
+
}
|
| 981 |
+
}
|
| 982 |
+
});
|
| 983 |
+
|
| 984 |
+
try {
|
| 985 |
+
const tagsStr = dataset.tags.join(', ');
|
| 986 |
+
window.Poe.sendUserMessage(
|
| 987 |
+
`@Claude-3.7-Sonnet An ML practitioner is looking for a dataset CSV after the query '${currentSearchQuery}'.
|
| 988 |
+
Generate the first 5 rows of a plausible and quality CSV for the dataset '${dataset.name}'.
|
| 989 |
+
You can get inspiration from related keywords '${tagsStr}' but most importantly the dataset should correspond to the query '${currentSearchQuery}'.
|
| 990 |
+
Focus on quality text content and use a 'label' or 'labels' column if it makes sense (invent labels, avoid reusing the keywords, be accurate while labelling texts).
|
| 991 |
+
Reply using a short description of the dataset with title **Dataset Description:** followed by the CSV content in a code block and with title **CSV Content Preview:**`,
|
| 992 |
+
{
|
| 993 |
+
handler: "dataset-preview-handler",
|
| 994 |
+
stream: false,
|
| 995 |
+
openChat: false
|
| 996 |
+
}
|
| 997 |
+
);
|
| 998 |
+
} catch (err) {
|
| 999 |
+
datasetDescription.innerHTML = `<p class="text-red-500">Error: ${err.message}</p>`;
|
| 1000 |
+
}
|
| 1001 |
+
}
|
| 1002 |
+
|
| 1003 |
+
function createTable(container, data, headers) {
|
| 1004 |
+
container.innerHTML = '';
|
| 1005 |
+
|
| 1006 |
+
const table = document.createElement('table');
|
| 1007 |
+
table.className = 'w-full';
|
| 1008 |
+
|
| 1009 |
+
// Create header
|
| 1010 |
+
const thead = document.createElement('thead');
|
| 1011 |
+
const headerRow = document.createElement('tr');
|
| 1012 |
+
|
| 1013 |
+
headers.forEach(header => {
|
| 1014 |
+
const th = document.createElement('th');
|
| 1015 |
+
th.textContent = header;
|
| 1016 |
+
headerRow.appendChild(th);
|
| 1017 |
+
});
|
| 1018 |
+
|
| 1019 |
+
thead.appendChild(headerRow);
|
| 1020 |
+
table.appendChild(thead);
|
| 1021 |
+
|
| 1022 |
+
// Create body
|
| 1023 |
+
const tbody = document.createElement('tbody');
|
| 1024 |
+
|
| 1025 |
+
data.forEach(row => {
|
| 1026 |
+
const tr = document.createElement('tr');
|
| 1027 |
+
|
| 1028 |
+
headers.forEach(header => {
|
| 1029 |
+
const td = document.createElement('td');
|
| 1030 |
+
td.textContent = row[header] || '';
|
| 1031 |
+
tr.appendChild(td);
|
| 1032 |
+
});
|
| 1033 |
+
|
| 1034 |
+
tbody.appendChild(tr);
|
| 1035 |
+
});
|
| 1036 |
+
|
| 1037 |
+
table.appendChild(tbody);
|
| 1038 |
+
container.appendChild(table);
|
| 1039 |
+
}
|
| 1040 |
+
|
| 1041 |
+
function generateFullDataset() {
|
| 1042 |
+
// Disable button and show status
|
| 1043 |
+
generateFullButton.disabled = true;
|
| 1044 |
+
generateStatus.classList.remove('hidden');
|
| 1045 |
+
rowsCount.textContent = '0';
|
| 1046 |
+
progressBar.style.width = '0%';
|
| 1047 |
+
|
| 1048 |
+
// Set up variables for tracking generation
|
| 1049 |
+
let csvHeader = '';
|
| 1050 |
+
const targetRows = MAX_FULL_DATASET_ROWS;
|
| 1051 |
+
let currentRows = 0;
|
| 1052 |
+
fullDatasetRows = [];
|
| 1053 |
+
|
| 1054 |
+
// Get the CSV header from the preview table
|
| 1055 |
+
const previewHeaders = Array.from(previewTable.querySelectorAll('thead th')).map(th => th.textContent);
|
| 1056 |
+
csvHeader = previewHeaders.join(',');
|
| 1057 |
+
|
| 1058 |
+
// Add initial rows from preview
|
| 1059 |
+
const previewRows = Array.from(previewTable.querySelectorAll('tbody tr')).map(tr => {
|
| 1060 |
+
const row = {};
|
| 1061 |
+
Array.from(tr.querySelectorAll('td')).forEach((td, index) => {
|
| 1062 |
+
row[previewHeaders[index]] = td.textContent;
|
| 1063 |
+
});
|
| 1064 |
+
return row;
|
| 1065 |
+
});
|
| 1066 |
+
|
| 1067 |
+
fullDatasetRows = [...previewRows];
|
| 1068 |
+
currentRows = previewRows.length;
|
| 1069 |
+
updateGenerationProgress(currentRows, targetRows);
|
| 1070 |
+
|
| 1071 |
+
// Choose generation method based on dataset type
|
| 1072 |
+
if (currentDataset.isReal) {
|
| 1073 |
+
generateFullRealDataset(previewHeaders, csvHeader, currentRows, targetRows);
|
| 1074 |
+
} else {
|
| 1075 |
+
generateFullAIDataset(previewHeaders, csvHeader, currentRows, targetRows);
|
| 1076 |
+
}
|
| 1077 |
+
}
|
| 1078 |
+
|
| 1079 |
+
function generateFullRealDataset(previewHeaders, csvHeader, currentRows, targetRows) {
|
| 1080 |
+
// Function to generate more rows in batches from "real" search results
|
| 1081 |
+
const generateBatch = (batchIndex) => {
|
| 1082 |
+
const batchSize = 15; // Larger batches for efficiency
|
| 1083 |
+
const startRow = currentRows + batchIndex * batchSize;
|
| 1084 |
+
|
| 1085 |
+
if (startRow >= targetRows) {
|
| 1086 |
+
// We've reached the target, show the full dataset
|
| 1087 |
+
showFullDataset();
|
| 1088 |
+
return;
|
| 1089 |
+
}
|
| 1090 |
+
|
| 1091 |
+
window.Poe.registerHandler(`real-batch-${batchIndex}-handler`, (result) => {
|
| 1092 |
+
if (result.status === "error") {
|
| 1093 |
+
showError("Error generating dataset rows");
|
| 1094 |
+
return;
|
| 1095 |
+
}
|
| 1096 |
+
|
| 1097 |
+
const message = result.responses[0];
|
| 1098 |
+
|
| 1099 |
+
if (message.status === "complete") {
|
| 1100 |
+
const content = message.content;
|
| 1101 |
+
|
| 1102 |
+
// Extract CSV content (remove markdown code block markers)
|
| 1103 |
+
let csvContent = content.replace(/```csv\n|```\n|```/g, '').trim();
|
| 1104 |
+
|
| 1105 |
+
// If there are multiple code blocks, try to find one with CSV data
|
| 1106 |
+
if (csvContent.includes('```')) {
|
| 1107 |
+
const codeBlocks = content.match(/```(?:csv)?\n([\s\S]*?)```/g) || [];
|
| 1108 |
+
if (codeBlocks.length > 0) {
|
| 1109 |
+
csvContent = codeBlocks[0].replace(/```(?:csv)?\n|```/g, '').trim();
|
| 1110 |
+
}
|
| 1111 |
+
}
|
| 1112 |
+
|
| 1113 |
+
try {
|
| 1114 |
+
// Parse the CSV
|
| 1115 |
+
const results = Papa.parse(csvContent, {
|
| 1116 |
+
header: true,
|
| 1117 |
+
skipEmptyLines: true
|
| 1118 |
+
});
|
| 1119 |
+
|
| 1120 |
+
if (results.data && results.data.length > 0) {
|
| 1121 |
+
// Add the new rows
|
| 1122 |
+
fullDatasetRows = [...fullDatasetRows, ...results.data];
|
| 1123 |
+
currentRows += results.data.length;
|
| 1124 |
+
|
| 1125 |
+
// Update progress
|
| 1126 |
+
updateGenerationProgress(currentRows, targetRows);
|
| 1127 |
+
|
| 1128 |
+
// Generate next batch
|
| 1129 |
+
generateBatch(batchIndex + 1);
|
| 1130 |
+
} else {
|
| 1131 |
+
// Try again with a different prompt
|
| 1132 |
+
generateBatch(batchIndex);
|
| 1133 |
+
}
|
| 1134 |
+
} catch (err) {
|
| 1135 |
+
console.error("Error parsing CSV:", err);
|
| 1136 |
+
// Try again
|
| 1137 |
+
generateBatch(batchIndex);
|
| 1138 |
+
}
|
| 1139 |
+
}
|
| 1140 |
+
});
|
| 1141 |
+
|
| 1142 |
+
try {
|
| 1143 |
+
// For variation, rotate through engines for each batch
|
| 1144 |
+
const engineForBatch = selectedEngines[batchIndex % selectedEngines.length] || currentDataset.engine;
|
| 1145 |
+
|
| 1146 |
+
window.Poe.sendUserMessage(
|
| 1147 |
+
`@Claude-3.7-Sonnet You're expanding a dataset based on search results from ${engineForBatch}.
|
| 1148 |
+
|
| 1149 |
+
For the dataset "${currentDataset.name}" about "${currentSearchQuery}", please generate ${batchSize} more rows of data.
|
| 1150 |
+
|
| 1151 |
+
Use this exact CSV header: ${csvHeader}
|
| 1152 |
+
|
| 1153 |
+
The data should look realistic, as if it came from actual ${engineForBatch} search results for "${currentSearchQuery}".
|
| 1154 |
+
Include appropriate values for each field, maintaining the same patterns and types as seen in the existing data.
|
| 1155 |
+
|
| 1156 |
+
Only include the CSV data in your response (header + ${batchSize} rows), no explanations or additional text.`,
|
| 1157 |
+
{
|
| 1158 |
+
handler: `real-batch-${batchIndex}-handler`,
|
| 1159 |
+
stream: false,
|
| 1160 |
+
openChat: false
|
| 1161 |
+
}
|
| 1162 |
+
);
|
| 1163 |
+
} catch (err) {
|
| 1164 |
+
showError("Error sending message: " + err);
|
| 1165 |
+
// Fall back to AI data
|
| 1166 |
+
generateBatch(batchIndex + 1);
|
| 1167 |
+
}
|
| 1168 |
+
};
|
| 1169 |
+
|
| 1170 |
+
// Start generating batches
|
| 1171 |
+
generateBatch(0);
|
| 1172 |
+
}
|
| 1173 |
+
|
| 1174 |
+
function generateFullAIDataset(previewHeaders, csvHeader, currentRows, targetRows) {
|
| 1175 |
+
// Function to generate more rows in batches from AI
|
| 1176 |
+
const generateBatch = (batchIndex) => {
|
| 1177 |
+
const batchSize = 10;
|
| 1178 |
+
const startRow = currentRows + batchIndex * batchSize;
|
| 1179 |
+
|
| 1180 |
+
if (startRow >= targetRows) {
|
| 1181 |
+
// We've reached the target, show the full dataset
|
| 1182 |
+
showFullDataset();
|
| 1183 |
+
return;
|
| 1184 |
+
}
|
| 1185 |
+
|
| 1186 |
+
window.Poe.registerHandler(`batch-${batchIndex}-handler`, (result) => {
|
| 1187 |
+
if (result.status === "error") {
|
| 1188 |
+
showError("Error generating dataset rows");
|
| 1189 |
+
return;
|
| 1190 |
+
}
|
| 1191 |
+
|
| 1192 |
+
const message = result.responses[0];
|
| 1193 |
+
|
| 1194 |
+
if (message.status === "complete") {
|
| 1195 |
+
const content = message.content;
|
| 1196 |
+
|
| 1197 |
+
// Extract CSV content (remove markdown code block markers)
|
| 1198 |
+
let csvContent = content.replace(/```csv\n|```\n|```/g, '').trim();
|
| 1199 |
+
|
| 1200 |
+
// If there are multiple code blocks, try to find one with CSV data
|
| 1201 |
+
if (csvContent.includes('```')) {
|
| 1202 |
+
const codeBlocks = content.match(/```(?:csv)?\n([\s\S]*?)```/g) || [];
|
| 1203 |
+
if (codeBlocks.length > 0) {
|
| 1204 |
+
csvContent = codeBlocks[0].replace(/```(?:csv)?\n|```/g, '').trim();
|
| 1205 |
+
}
|
| 1206 |
+
}
|
| 1207 |
+
|
| 1208 |
+
try {
|
| 1209 |
+
// Parse the CSV
|
| 1210 |
+
const results = Papa.parse(csvContent, {
|
| 1211 |
+
header: true,
|
| 1212 |
+
skipEmptyLines: true
|
| 1213 |
+
});
|
| 1214 |
+
|
| 1215 |
+
if (results.data && results.data.length > 0) {
|
| 1216 |
+
// Add the new rows
|
| 1217 |
+
fullDatasetRows = [...fullDatasetRows, ...results.data];
|
| 1218 |
+
currentRows += results.data.length;
|
| 1219 |
+
|
| 1220 |
+
// Update progress
|
| 1221 |
+
updateGenerationProgress(currentRows, targetRows);
|
| 1222 |
+
|
| 1223 |
+
// Generate next batch
|
| 1224 |
+
generateBatch(batchIndex + 1);
|
| 1225 |
+
} else {
|
| 1226 |
+
// Try again with a different prompt
|
| 1227 |
+
generateBatch(batchIndex);
|
| 1228 |
+
}
|
| 1229 |
+
} catch (err) {
|
| 1230 |
+
console.error("Error parsing CSV:", err);
|
| 1231 |
+
// Try again
|
| 1232 |
+
generateBatch(batchIndex);
|
| 1233 |
+
}
|
| 1234 |
+
}
|
| 1235 |
+
});
|
| 1236 |
+
|
| 1237 |
+
try {
|
| 1238 |
+
const tagsStr = currentDataset.tags.join(', ');
|
| 1239 |
+
window.Poe.sendUserMessage(
|
| 1240 |
+
`@Claude-3.7-Sonnet For the dataset '${currentDataset.name}' about '${currentSearchQuery}' with tags '${tagsStr}',
|
| 1241 |
+
please generate ${batchSize} more sample rows in CSV format. Use the same CSV header: ${csvHeader}
|
| 1242 |
+
Only include the CSV data in your response, no explanations or additional text.`,
|
| 1243 |
+
{
|
| 1244 |
+
handler: `batch-${batchIndex}-handler`,
|
| 1245 |
+
stream: false,
|
| 1246 |
+
openChat: false
|
| 1247 |
+
}
|
| 1248 |
+
);
|
| 1249 |
+
} catch (err) {
|
| 1250 |
+
showError("Error sending message: " + err);
|
| 1251 |
+
}
|
| 1252 |
+
};
|
| 1253 |
+
|
| 1254 |
+
// Start generating batches
|
| 1255 |
+
generateBatch(0);
|
| 1256 |
+
}
|
| 1257 |
+
|
| 1258 |
+
function updateGenerationProgress(current, total) {
|
| 1259 |
+
rowsCount.textContent = current;
|
| 1260 |
+
const percentage = Math.min(100, Math.floor((current / total) * 100));
|
| 1261 |
+
progressBar.style.width = `${percentage}%`;
|
| 1262 |
+
}
|
| 1263 |
+
|
| 1264 |
+
function showFullDataset() {
|
| 1265 |
+
// Hide generation status
|
| 1266 |
+
generateStatus.classList.add('hidden');
|
| 1267 |
|
| 1268 |
+
// Show full dataset section
|
| 1269 |
+
fullDatasetSection.classList.remove('hidden');
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1270 |
|
| 1271 |
+
// Get headers from the data
|
| 1272 |
+
const headers = Object.keys(fullDatasetRows[0] || {});
|
| 1273 |
|
| 1274 |
+
// Create and display the table
|
| 1275 |
+
createTable(fullTable, fullDatasetRows.slice(0, 10), headers);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1276 |
|
| 1277 |
+
// Add a note about showing limited rows
|
| 1278 |
+
const note = document.createElement('p');
|
| 1279 |
+
note.className = 'text-sm text-gray-600 dark:text-gray-400 mt-2';
|
| 1280 |
+
note.textContent = `Showing 10 of ${fullDatasetRows.length} rows. Use the download buttons to get the complete dataset.`;
|
| 1281 |
+
fullTable.appendChild(note);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1282 |
}
|
| 1283 |
|
| 1284 |
+
function downloadData(format) {
|
| 1285 |
+
if (fullDatasetRows.length === 0) return;
|
| 1286 |
+
|
| 1287 |
+
const filename = `${currentDataset.name.replace(/\s+/g, '_')}_dataset`;
|
| 1288 |
+
|
| 1289 |
+
switch(format) {
|
| 1290 |
+
case 'csv':
|
| 1291 |
+
downloadCsv(filename);
|
| 1292 |
+
break;
|
| 1293 |
+
case 'json':
|
| 1294 |
+
downloadJson(filename);
|
| 1295 |
+
break;
|
| 1296 |
+
case 'parquet':
|
| 1297 |
+
// Show a notification that this format is simulated
|
| 1298 |
+
showNotification("Parquet format download simulated - actual conversion would require a server component");
|
| 1299 |
+
downloadJson(filename + "_parquet_simulated");
|
| 1300 |
+
break;
|
| 1301 |
+
}
|
| 1302 |
}
|
| 1303 |
|
| 1304 |
+
function downloadCsv(filename) {
|
| 1305 |
+
// Convert data to CSV
|
| 1306 |
+
const csv = Papa.unparse(fullDatasetRows);
|
| 1307 |
+
|
| 1308 |
+
// Create a blob and download link
|
| 1309 |
+
const blob = new Blob([csv], { type: 'text/csv' });
|
| 1310 |
+
const url = URL.createObjectURL(blob);
|
| 1311 |
+
const a = document.createElement('a');
|
| 1312 |
+
|
| 1313 |
+
a.href = url;
|
| 1314 |
+
a.download = `${filename}.csv`;
|
| 1315 |
+
document.body.appendChild(a);
|
| 1316 |
+
a.click();
|
| 1317 |
+
|
| 1318 |
+
// Clean up
|
| 1319 |
+
setTimeout(() => {
|
| 1320 |
+
document.body.removeChild(a);
|
| 1321 |
+
URL.revokeObjectURL(url);
|
| 1322 |
+
}, 100);
|
| 1323 |
}
|
| 1324 |
|
| 1325 |
+
function downloadJson(filename) {
|
| 1326 |
+
// Convert data to JSON
|
| 1327 |
+
const json = JSON.stringify(fullDatasetRows, null, 2);
|
| 1328 |
|
| 1329 |
+
// Create a blob and download link
|
| 1330 |
+
const blob = new Blob([json], { type: 'application/json' });
|
| 1331 |
+
const url = URL.createObjectURL(blob);
|
| 1332 |
+
const a = document.createElement('a');
|
|
|
|
|
|
|
| 1333 |
|
| 1334 |
+
a.href = url;
|
| 1335 |
+
a.download = `${filename}.json`;
|
| 1336 |
+
document.body.appendChild(a);
|
| 1337 |
+
a.click();
|
| 1338 |
+
|
| 1339 |
+
// Clean up
|
| 1340 |
+
setTimeout(() => {
|
| 1341 |
+
document.body.removeChild(a);
|
| 1342 |
+
URL.revokeObjectURL(url);
|
| 1343 |
+
}, 100);
|
| 1344 |
}
|
| 1345 |
|
| 1346 |
+
function showSearchPage() {
|
| 1347 |
+
searchPage.classList.remove('hidden');
|
| 1348 |
+
datasetPage.classList.add('hidden');
|
|
|
|
|
|
|
| 1349 |
}
|
| 1350 |
|
| 1351 |
+
function showError(message) {
|
| 1352 |
+
console.error(message);
|
| 1353 |
+
showNotification(message, true);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1354 |
}
|
| 1355 |
|
| 1356 |
+
function showNotification(message, isError = false) {
|
| 1357 |
+
const notification = document.createElement('div');
|
| 1358 |
+
notification.className = `fixed bottom-4 right-4 px-6 py-3 rounded-lg shadow-lg ${
|
| 1359 |
+
isError
|
| 1360 |
+
? 'bg-red-500 text-white'
|
| 1361 |
+
: 'bg-green-500 text-white'
|
| 1362 |
+
} z-50 transition-opacity duration-300`;
|
| 1363 |
+
notification.textContent = message;
|
| 1364 |
|
| 1365 |
+
document.body.appendChild(notification);
|
|
|
|
| 1366 |
|
| 1367 |
+
setTimeout(() => {
|
| 1368 |
+
notification.style.opacity = '0';
|
| 1369 |
+
setTimeout(() => {
|
| 1370 |
+
document.body.removeChild(notification);
|
| 1371 |
+
}, 300);
|
| 1372 |
+
}, 3000);
|
|
|
|
| 1373 |
}
|
| 1374 |
|
| 1375 |
+
function showPlaceholderDatasets() {
|
| 1376 |
+
const placeholders = [
|
| 1377 |
+
{
|
| 1378 |
+
name: "NewsEventsPredict",
|
| 1379 |
+
tags: ["classification", "media", "trend"],
|
| 1380 |
+
isReal: true,
|
| 1381 |
+
engine: "AlltheInternet.com"
|
| 1382 |
+
},
|
| 1383 |
+
{
|
| 1384 |
+
name: "FinancialForecast",
|
| 1385 |
+
tags: ["economy", "stocks", "regression"],
|
| 1386 |
+
isReal: false
|
| 1387 |
+
},
|
| 1388 |
+
{
|
| 1389 |
+
name: "HealthMonitor",
|
| 1390 |
+
tags: ["science", "real-time", "anomaly detection"],
|
| 1391 |
+
isReal: true,
|
| 1392 |
+
engine: "DuckDuckGo.com"
|
| 1393 |
+
},
|
| 1394 |
+
{
|
| 1395 |
+
name: "SportsAnalysis",
|
| 1396 |
+
tags: ["classification", "performance", "player tracking"],
|
| 1397 |
+
isReal: false
|
| 1398 |
+
},
|
| 1399 |
+
{
|
| 1400 |
+
name: "RetailSalesAnalyzer",
|
| 1401 |
+
tags: ["consumer behavior", "sales trend", "segmentation"],
|
| 1402 |
+
isReal: true,
|
| 1403 |
+
engine: "Bing.com"
|
| 1404 |
+
},
|
| 1405 |
+
{
|
| 1406 |
+
name: "SocialMediaSentiment",
|
| 1407 |
+
tags: ["text classification", "opinion mining", "NLP"],
|
| 1408 |
+
isReal: false
|
| 1409 |
+
}
|
| 1410 |
+
];
|
| 1411 |
+
|
| 1412 |
+
currentDatasets = placeholders;
|
| 1413 |
+
displayDatasets(placeholders);
|
| 1414 |
+
loadMoreContainer.classList.remove('hidden');
|
| 1415 |
+
}
|
| 1416 |
function searchWithRealData(query) {
|
| 1417 |
// Randomly select a search engine from the user's selected engines
|
| 1418 |
currentEngine = selectedEngines[Math.floor(Math.random() * selectedEngines.length)];
|
|
|
|
| 1525 |
}
|
| 1526 |
}
|
| 1527 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1528 |
function loadMoreDatasets() {
|
| 1529 |
currentPage++;
|
| 1530 |
|
|
|
|
| 1990 |
);
|
| 1991 |
} catch (err) {
|
| 1992 |
showError("Error sending message: " + err);
|
| 1993 |
+
// Fall back to AI data
|
| 1994 |
+
generateBatch(batchIndex + 1);
|
| 1995 |
}
|
| 1996 |
};
|
| 1997 |
|
|
|
|
| 2803 |
engine_settings_button = gr.Button("Configure Search Engines", icon="https://img.icons8.com/ios-filled/50/000000/settings--v1.png", size="sm")
|
| 2804 |
|
| 2805 |
# Engine Selection Modal
|
| 2806 |
+
with gr.Column(visible=False, id="engine-modal-container") as engine_modal:
|
| 2807 |
+
with gr.Blocks():
|
| 2808 |
+
with gr.Row():
|
| 2809 |
+
gr.Markdown("### Search Engine Settings", elem_classes="text-xl font-bold")
|
| 2810 |
+
close_modal_btn = gr.Button("❌")
|
| 2811 |
+
gr.Markdown("Select which search engines to use for real data retrieval. A diverse selection improves results.")
|
| 2812 |
+
engine_options_html_comp = gr.HTML(elem_id="engine-options")
|
| 2813 |
+
with gr.Row():
|
| 2814 |
+
select_all_engines_btn = gr.Button("Select All")
|
| 2815 |
+
deselect_all_engines_btn = gr.Button("Deselect All")
|
| 2816 |
+
save_engines_btn = gr.Button("Save Settings", variant="primary")
|
| 2817 |
|
| 2818 |
# --- Dataset Detail Page UI ---
|
| 2819 |
with gr.Column(visible=False, elem_id="dataset-page") as dataset_page:
|
|
|
|
| 2860 |
if "I'm sorry" in line or "policy" in line: raise gr.Error("Inappropriate content detected.")
|
| 2861 |
if generated_count >= MAX_NB_ITEMS_PER_GENERATION_CALL: break
|
| 2862 |
|
| 2863 |
+
match = re.match(r"^\s*\d+\.\s+(.+?)\s+\((.+?)\)", line) # Parse line format
|
| 2864 |
if match:
|
| 2865 |
dataset_name, tags = match.groups()
|
| 2866 |
dataset_name, tags = dataset_name.strip(), tags.strip()
|
|
|
|
| 3125 |
outputs=[selected_engines_state, current_engine_state, gr.Info()]
|
| 3126 |
)
|
| 3127 |
|
| 3128 |
+
engine_settings_button.click(lambda: gr.Column(visible=True), outputs=[engine_modal])
|
| 3129 |
+
close_modal_btn.click(lambda: gr.Column(visible=False), outputs=[engine_modal])
|
| 3130 |
+
|
| 3131 |
# Initial App Load Logic
|
| 3132 |
@demo.load(outputs=([search_page, dataset_page, dataset_title_md, dataset_description_md, dataset_source_badge, dataset_source_info, dataset_share_textbox, full_dataset_section, save_dataset_button, open_dataset_message, search_bar] + # Outputs for detail page and search bar
|
| 3133 |
buttons + [generated_texts_state] + # Outputs for search results buttons and state
|
|
|
|
| 3190 |
|
| 3191 |
|
| 3192 |
if __name__ == "__main__":
|
| 3193 |
+
demo.launch(share=False, server_name="0.0.0.0")
|