| | <!DOCTYPE html> |
| | <html lang="en"> |
| | <head> |
| | <meta charset="UTF-8"> |
| | <meta name="viewport" content="width=device-width, initial-scale=1.0"> |
| | <title>LLMScraper | AI Data Collection Platform</title> |
| | <script src="https://cdn.tailwindcss.com"></script> |
| | <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.0/css/all.min.css"> |
| | <script> |
| | tailwind.config = { |
| | theme: { |
| | extend: { |
| | colors: { |
| | primary: '#4F46E5', |
| | secondary: '#10B981', |
| | dark: '#1F2937', |
| | light: '#F9FAFB' |
| | } |
| | } |
| | } |
| | } |
| | </script> |
| | <style> |
| | @import url('https://fonts.googleapis.com/css2?family=Inter:wght@300;400;500;600;700&display=swap'); |
| | |
| | body { |
| | font-family: 'Inter', sans-serif; |
| | background: linear-gradient(135deg, #f0f9ff 0%, #e6f7ff 100%); |
| | min-height: 100vh; |
| | } |
| | |
| | .card-hover { |
| | transition: all 0.3s ease; |
| | box-shadow: 0 4px 6px rgba(0, 0, 0, 0.05); |
| | } |
| | |
| | .card-hover:hover { |
| | transform: translateY(-5px); |
| | box-shadow: 0 10px 15px rgba(0, 0, 0, 0.1); |
| | } |
| | |
| | .gradient-bg { |
| | background: linear-gradient(135deg, #4F46E5 0%, #7C3AED 100%); |
| | } |
| | |
| | .progress-bar { |
| | height: 8px; |
| | border-radius: 4px; |
| | overflow: hidden; |
| | } |
| | |
| | .progress-fill { |
| | height: 100%; |
| | border-radius: 4px; |
| | background: linear-gradient(90deg, #4F46E5 0%, #10B981 100%); |
| | width: 75%; |
| | } |
| | |
| | .data-table { |
| | border-collapse: separate; |
| | border-spacing: 0; |
| | } |
| | |
| | .data-table th { |
| | background-color: #f3f4f6; |
| | } |
| | |
| | .data-table tr:nth-child(even) { |
| | background-color: #f9fafb; |
| | } |
| | |
| | .data-table tr:hover { |
| | background-color: #f0f9ff; |
| | } |
| | |
| | .glow { |
| | box-shadow: 0 0 15px rgba(79, 70, 229, 0.3); |
| | } |
| | |
| | .pulse { |
| | animation: pulse 2s infinite; |
| | } |
| | |
| | @keyframes pulse { |
| | 0% { box-shadow: 0 0 0 0 rgba(79, 70, 229, 0.4); } |
| | 70% { box-shadow: 0 0 0 10px rgba(79, 70, 229, 0); } |
| | 100% { box-shadow: 0 0 0 0 rgba(79, 70, 229, 0); } |
| | } |
| | |
| | .tag { |
| | display: inline-block; |
| | padding: 2px 8px; |
| | border-radius: 12px; |
| | font-size: 0.75rem; |
| | font-weight: 500; |
| | } |
| | |
| | .tag-processing { |
| | background-color: #fef3c7; |
| | color: #92400e; |
| | } |
| | |
| | .tag-cleaned { |
| | background-color: #d1fae5; |
| | color: #065f46; |
| | } |
| | |
| | .tag-labeled { |
| | background-color: #dbeafe; |
| | color: #1e40af; |
| | } |
| | </style> |
| | </head> |
| | <body class="text-gray-800"> |
| | |
| | <header class="gradient-bg text-white"> |
| | <div class="container mx-auto px-4 py-6"> |
| | <div class="flex justify-between items-center"> |
| | <div class="flex items-center space-x-2"> |
| | <i class="fas fa-robot text-3xl"></i> |
| | <h1 class="text-2xl font-bold">LLMScraper</h1> |
| | </div> |
| | <nav class="hidden md:block"> |
| | <ul class="flex space-x-8"> |
| | <li><a href="#" class="hover:text-indigo-200 transition">Dashboard</a></li> |
| | <li><a href="#" class="hover:text-indigo-200 transition">Scrape Jobs</a></li> |
| | <li><a href="#" class="hover:text-indigo-200 transition">Data Library</a></li> |
| | <li><a href="#" class="hover:text-indigo-200 transition">Labeling</a></li> |
| | <li><a href="#" class="hover:text-indigo-200 transition">API</a></li> |
| | </ul> |
| | </nav> |
| | <div class="flex items-center space-x-4"> |
| | <button class="bg-white text-primary px-4 py-2 rounded-lg font-medium hover:bg-indigo-50 transition">Sign In</button> |
| | <button class="hidden md:block bg-secondary text-white px-4 py-2 rounded-lg font-medium hover:bg-emerald-500 transition">Get Started</button> |
| | <button class="md:hidden text-2xl"><i class="fas fa-bars"></i></button> |
| | </div> |
| | </div> |
| | </div> |
| | </header> |
| |
|
| | |
| | <section class="gradient-bg text-white py-16"> |
| | <div class="container mx-auto px-4 text-center"> |
| | <h1 class="text-4xl md:text-5xl font-bold mb-6">AI-Powered Web Scraping & Data Labeling</h1> |
| | <p class="text-xl max-w-3xl mx-auto mb-10">Collect, clean, and label web data at scale using advanced language models to fine-tune your AI systems</p> |
| | <div class="flex flex-col sm:flex-row justify-center gap-4"> |
| | <button class="bg-white text-primary px-8 py-3 rounded-lg font-bold text-lg hover:bg-indigo-50 transition">Start Free Trial</button> |
| | <button class="bg-transparent border-2 border-white px-8 py-3 rounded-lg font-bold text-lg hover:bg-white hover:text-primary transition">View Demo</button> |
| | </div> |
| | </div> |
| | </section> |
| |
|
| | |
| | <section class="py-16 bg-light"> |
| | <div class="container mx-auto px-4"> |
| | <h2 class="text-3xl font-bold text-center mb-4">Powerful Data Collection Workflow</h2> |
| | <p class="text-gray-600 text-center max-w-2xl mx-auto mb-12">Our AI-powered platform handles the entire data pipeline from collection to labeling</p> |
| | |
| | <div class="grid grid-cols-1 md:grid-cols-3 gap-8"> |
| | |
| | <div class="bg-white rounded-xl p-6 card-hover"> |
| | <div class="w-16 h-16 rounded-full bg-indigo-100 flex items-center justify-center mb-6"> |
| | <i class="fas fa-spider text-2xl text-primary"></i> |
| | </div> |
| | <h3 class="text-xl font-bold mb-3">Intelligent Web Scraping</h3> |
| | <p class="text-gray-600 mb-4">Extract structured data from any website using natural language instructions. No complex selectors needed.</p> |
| | <ul class="space-y-2"> |
| | <li class="flex items-start"> |
| | <i class="fas fa-check-circle text-secondary mt-1 mr-2"></i> |
| | <span>Handles JavaScript-rendered content</span> |
| | </li> |
| | <li class="flex items-start"> |
| | <i class="fas fa-check-circle text-secondary mt-1 mr-2"></i> |
| | <span>Automatic pagination & navigation</span> |
| | </li> |
| | <li class="flex items-start"> |
| | <i class="fas fa-check-circle text-secondary mt-1 mr-2"></i> |
| | <span>Anti-bot detection bypass</span> |
| | </li> |
| | </ul> |
| | </div> |
| | |
| | |
| | <div class="bg-white rounded-xl p-6 card-hover"> |
| | <div class="w-16 h-16 rounded-full bg-emerald-100 flex items-center justify-center mb-6"> |
| | <i class="fas fa-broom text-2xl text-secondary"></i> |
| | </div> |
| | <h3 class="text-xl font-bold mb-3">AI Data Cleaning</h3> |
| | <p class="text-gray-600 mb-4">Automatically clean and normalize scraped data using language understanding to fix inconsistencies.</p> |
| | <ul class="space-y-2"> |
| | <li class="flex items-start"> |
| | <i class="fas fa-check-circle text-secondary mt-1 mr-2"></i> |
| | <span>Entity recognition & normalization</span> |
| | </li> |
| | <li class="flex items-start"> |
| | <i class="fas fa-check-circle text-secondary mt-1 mr-2"></i> |
| | <span>Duplicate detection & removal</span> |
| | </li> |
| | <li class="flex items-start"> |
| | <i class="fas fa-check-circle text-secondary mt-1 mr-2"></i> |
| | <span>Context-aware error correction</span> |
| | </li> |
| | </ul> |
| | </div> |
| | |
| | |
| | <div class="bg-white rounded-xl p-6 card-hover"> |
| | <div class="w-16 h-16 rounded-full bg-blue-100 flex items-center justify-center mb-6"> |
| | <i class="fas fa-tags text-2xl text-blue-500"></i> |
| | </div> |
| | <h3 class="text-xl font-bold mb-3">Automated Labeling</h3> |
| | <p class="text-gray-600 mb-4">Generate high-quality labels for your datasets using large language models with human-in-the-loop validation.</p> |
| | <ul class="space-y-2"> |
| | <li class="flex items-start"> |
| | <i class="fas fa-check-circle text-secondary mt-1 mr-2"></i> |
| | <span>Zero-shot & few-shot classification</span> |
| | </li> |
| | <li class="flex items-start"> |
| | <i class="fas fa-check-circle text-secondary mt-1 mr-2"></i> |
| | <span>Semantic similarity clustering</span> |
| | </li> |
| | <li class="flex items-start"> |
| | <i class="fas fa-check-circle text-secondary mt-1 mr-2"></i> |
| | <span>Active learning for model improvement</span> |
| | </li> |
| | </ul> |
| | </div> |
| | </div> |
| | </div> |
| | </section> |
| |
|
| | |
| | <section class="py-16 bg-white"> |
| | <div class="container mx-auto px-4"> |
| | <h2 class="text-3xl font-bold text-center mb-4">See It In Action</h2> |
| | <p class="text-gray-600 text-center max-w-2xl mx-auto mb-12">Our platform makes it simple to collect and prepare training data</p> |
| | |
| | <div class="bg-gray-50 rounded-xl p-6 mb-8"> |
| | <div class="flex flex-wrap gap-4 mb-6"> |
| | <div class="flex-1 min-w-[300px]"> |
| | <label class="block text-sm font-medium mb-2">Enter Website URL</label> |
| | <div class="flex"> |
| | <input type="text" class="flex-1 border border-gray-300 rounded-l-lg px-4 py-2 focus:outline-none focus:ring-2 focus:ring-primary" placeholder="https://example.com/products"> |
| | <button class="bg-primary text-white px-6 py-2 rounded-r-lg font-medium">Scrape</button> |
| | </div> |
| | </div> |
| | <div class="flex-1 min-w-[300px]"> |
| | <label class="block text-sm font-medium mb-2">Data Schema</label> |
| | <select class="w-full border border-gray-300 rounded-lg px-4 py-2 focus:outline-none focus:ring-2 focus:ring-primary"> |
| | <option>E-commerce Product Data</option> |
| | <option>News Articles</option> |
| | <option>Job Listings</option> |
| | <option>Real Estate Listings</option> |
| | <option>Custom Schema</option> |
| | </select> |
| | </div> |
| | </div> |
| | |
| | <div class="bg-white rounded-lg border border-gray-200 overflow-hidden"> |
| | <div class="overflow-x-auto"> |
| | <table class="w-full data-table"> |
| | <thead> |
| | <tr> |
| | <th class="text-left py-3 px-4 font-semibold text-sm border-b">Status</th> |
| | <th class="text-left py-3 px-4 font-semibold text-sm border-b">Page URL</th> |
| | <th class="text-left py-3 px-4 font-semibold text-sm border-b">Items Found</th> |
| | <th class="text-left py-3 px-4 font-semibold text-sm border-b">Progress</th> |
| | </tr> |
| | </thead> |
| | <tbody> |
| | <tr> |
| | <td class="py-3 px-4 border-b"> |
| | <span class="tag tag-processing">Processing</span> |
| | </td> |
| | <td class="py-3 px-4 border-b">https://example.com/products</td> |
| | <td class="py-3 px-4 border-b">24</td> |
| | <td class="py-3 px-4 border-b"> |
| | <div class="progress-bar bg-gray-200 w-full"> |
| | <div class="progress-fill"></div> |
| | </div> |
| | <div class="text-xs text-gray-500 mt-1">75% complete</div> |
| | </td> |
| | </tr> |
| | <tr> |
| | <td class="py-3 px-4 border-b"> |
| | <span class="tag tag-cleaned">Cleaned</span> |
| | </td> |
| | <td class="py-3 px-4 border-b">https://example.com/specials</td> |
| | <td class="py-3 px-4 border-b">18</td> |
| | <td class="py-3 px-4 border-b"> |
| | <div class="progress-bar bg-gray-200 w-full"> |
| | <div class="progress-fill" style="width: 100%"></div> |
| | </div> |
| | <div class="text-xs text-gray-500 mt-1">100% complete</div> |
| | </td> |
| | </tr> |
| | <tr> |
| | <td class="py-3 px-4"> |
| | <span class="tag tag-labeled">Labeled</span> |
| | </td> |
| | <td class="py-3 px-4">https://example.com/new-arrivals</td> |
| | <td class="py-3 px-4">32</td> |
| | <td class="py-3 px-4"> |
| | <div class="progress-bar bg-gray-200 w-full"> |
| | <div class="progress-fill" style="width: 100%"></div> |
| | </div> |
| | <div class="text-xs text-gray-500 mt-1">100% complete</div> |
| | </td> |
| | </tr> |
| | </tbody> |
| | </table> |
| | </div> |
| | </div> |
| | </div> |
| | |
| | <div class="grid grid-cols-1 md:grid-cols-2 gap-8"> |
| | <div class="bg-indigo-50 rounded-xl p-6"> |
| | <h3 class="text-xl font-bold mb-4">Raw Scraped Data</h3> |
| | <div class="bg-white rounded-lg p-4 font-mono text-sm overflow-x-auto max-h-60"> |
| | <pre>{ |
| | "products": [ |
| | { |
| | "title": "Premium Headphones - Wireless", |
| | "price": "$199.99", |
| | "description": "Experience crystal-clear audio with our premium wireless headphones...", |
| | "rating": "4.5 out of 5 stars", |
| | "availability": "In Stock" |
| | }, |
| | ... |
| | ] |
| | }</pre> |
| | </div> |
| | </div> |
| | |
| | <div class="bg-emerald-50 rounded-xl p-6"> |
| | <h3 class="text-xl font-bold mb-4">Cleaned & Labeled Data</h3> |
| | <div class="bg-white rounded-lg p-4 font-mono text-sm overflow-x-auto max-h-60"> |
| | <pre>{ |
| | "products": [ |
| | { |
| | "title": "Premium Headphones Wireless", |
| | "price": 199.99, |
| | "currency": "USD", |
| | "description": "Experience crystal-clear audio with premium wireless headphones...", |
| | "rating": 4.5, |
| | "max_rating": 5, |
| | "availability": true, |
| | "category": "Electronics > Audio > Headphones", |
| | "features": ["wireless", "noise-cancelling", "bluetooth"] |
| | }, |
| | ... |
| | ] |
| | }</pre> |
| | </div> |
| | </div> |
| | </div> |
| | </div> |
| | </section> |
| |
|
| | |
| | <section class="py-16 gradient-bg text-white"> |
| | <div class="container mx-auto px-4 text-center"> |
| | <h2 class="text-3xl font-bold mb-6">Ready to Enhance Your AI Models?</h2> |
| | <p class="text-xl max-w-2xl mx-auto mb-10">Start collecting high-quality training data today with our AI-powered platform</p> |
| | <div class="flex flex-col sm:flex-row justify-center gap-4"> |
| | <button class="bg-white text-primary px-8 py-3 rounded-lg font-bold text-lg hover:bg-indigo-50 transition">Get Started Free</button> |
| | <button class="bg-transparent border-2 border-white px-8 py-3 rounded-lg font-bold text-lg hover:bg-white hover:text-primary transition">Schedule a Demo</button> |
| | </div> |
| | </div> |
| | </section> |
| |
|
| | |
| | <footer class="bg-dark text-gray-300 py-12"> |
| | <div class="container mx-auto px-4"> |
| | <div class="grid grid-cols-1 md:grid-cols-4 gap-8"> |
| | <div> |
| | <div class="flex items-center space-x-2 mb-4"> |
| | <i class="fas fa-robot text-2xl text-primary"></i> |
| | <h3 class="text-xl font-bold text-white">LLMScraper</h3> |
| | </div> |
| | <p class="mb-4">AI-powered web scraping and data labeling for machine learning teams.</p> |
| | <div class="flex space-x-4"> |
| | <a href="#" class="text-gray-400 hover:text-white"><i class="fab fa-twitter"></i></a> |
| | <a href="#" class="text-gray-400 hover:text-white"><i class="fab fa-linkedin"></i></a> |
| | <a href="#" class="text-gray-400 hover:text-white"><i class="fab fa-github"></i></a> |
| | </div> |
| | </div> |
| | |
| | <div> |
| | <h4 class="text-lg font-semibold text-white mb-4">Product</h4> |
| | <ul class="space-y-2"> |
| | <li><a href="#" class="hover:text-white transition">Features</a></li> |
| | <li><a href="#" class="hover:text-white transition">Pricing</a></li> |
| | <li><a href="#" class="hover:text-white transition">Integrations</a></li> |
| | <li><a href="#" class="hover:text-white transition">Roadmap</a></li> |
| | </ul> |
| | </div> |
| | |
| | <div> |
| | <h4 class="text-lg font-semibold text-white mb-4">Resources</h4> |
| | <ul class="space-y-2"> |
| | <li><a href="#" class="hover:text-white transition">Documentation</a></li> |
| | <li><a href="#" class="hover:text-white transition">API Reference</a></li> |
| | <li><a href="#" class="hover:text-white transition">Tutorials</a></li> |
| | <li><a href="#" class="hover:text-white transition">Blog</a></li> |
| | </ul> |
| | </div> |
| | |
| | <div> |
| | <h4 class="text-lg font-semibold text-white mb-4">Legal</h4> |
| | <ul class="space-y-2"> |
| | <li><a href="#" class="hover:text-white transition">Privacy Policy</a></li> |
| | <li><a href="#" class="hover:text-white transition">Terms of Service</a></li> |
| | <li><a href="#" class="hover:text-white transition">Compliance</a></li> |
| | <li><a href="#" class="hover:text-white transition">Scraping Ethics</a></li> |
| | </ul> |
| | </div> |
| | </div> |
| | |
| | <div class="border-t border-gray-700 mt-12 pt-8 text-center"> |
| | <p>© 2023 LLMScraper. All rights reserved.</p> |
| | </div> |
| | </div> |
| | </footer> |
| |
|
| | |
| | <div class="fixed bottom-6 right-6"> |
| | <button class="w-14 h-14 rounded-full bg-primary text-white flex items-center justify-center shadow-lg glow pulse"> |
| | <i class="fas fa-plus text-xl"></i> |
| | </button> |
| | </div> |
| |
|
| | <script> |
| | |
| | document.addEventListener('DOMContentLoaded', function() { |
| | |
| | const scrapeBtn = document.querySelector('button:contains("Scrape")'); |
| | if(scrapeBtn) { |
| | scrapeBtn.addEventListener('click', function() { |
| | const urlInput = document.querySelector('input[type="text"]'); |
| | if(urlInput.value.trim() === '') { |
| | alert('Please enter a valid URL'); |
| | return; |
| | } |
| | |
| | |
| | const originalText = scrapeBtn.textContent; |
| | scrapeBtn.innerHTML = '<i class="fas fa-spinner fa-spin mr-2"></i> Scraping...'; |
| | scrapeBtn.disabled = true; |
| | |
| | |
| | setTimeout(function() { |
| | scrapeBtn.innerHTML = originalText; |
| | scrapeBtn.disabled = false; |
| | |
| | |
| | alert(`Successfully started scraping: ${urlInput.value}`); |
| | |
| | |
| | const tableBody = document.querySelector('.data-table tbody'); |
| | const newRow = document.createElement('tr'); |
| | newRow.innerHTML = ` |
| | <td class="py-3 px-4 border-b"> |
| | <span class="tag tag-processing">Processing</span> |
| | </td> |
| | <td class="py-3 px-4 border-b">${urlInput.value}</td> |
| | <td class="py-3 px-4 border-b">0</td> |
| | <td class="py-3 px-4 border-b"> |
| | <div class="progress-bar bg-gray-200 w-full"> |
| | <div class="progress-fill" style="width: 5%"></div> |
| | </div> |
| | <div class="text-xs text-gray-500 mt-1">5% complete</div> |
| | </td> |
| | `; |
| | tableBody.insertBefore(newRow, tableBody.firstChild); |
| | |
| | |
| | simulateProgress(newRow); |
| | }, 2000); |
| | }); |
| | } |
| | |
| | function simulateProgress(row) { |
| | let progress = 5; |
| | const interval = setInterval(() => { |
| | progress += 5; |
| | if(progress > 100) progress = 100; |
| | |
| | const progressBar = row.querySelector('.progress-fill'); |
| | const progressText = row.querySelector('.text-xs'); |
| | |
| | if(progressBar && progressText) { |
| | progressBar.style.width = `${progress}%`; |
| | progressText.textContent = `${progress}% complete`; |
| | |
| | if(progress === 100) { |
| | clearInterval(interval); |
| | const statusTag = row.querySelector('.tag'); |
| | if(statusTag) { |
| | statusTag.textContent = "Cleaned"; |
| | statusTag.className = "tag tag-cleaned"; |
| | } |
| | } |
| | } |
| | }, 500); |
| | } |
| | }); |
| | </script> |
| | <p style="border-radius: 8px; text-align: center; font-size: 12px; color: #fff; margin-top: 16px;position: fixed; left: 8px; bottom: 8px; z-index: 10; background: rgba(0, 0, 0, 0.8); padding: 4px 8px;">Made with <img src="https://enzostvs-deepsite.hf.space/logo.svg" alt="DeepSite Logo" style="width: 16px; height: 16px; vertical-align: middle;display:inline-block;margin-right:3px;filter:brightness(0) invert(1);"><a href="https://enzostvs-deepsite.hf.space" style="color: #fff;text-decoration: underline;" target="_blank" >DeepSite</a> - 🧬 <a href="https://enzostvs-deepsite.hf.space?remix=MagicMeWizard/ai-generate-website-ui" style="color: #fff;text-decoration: underline;" target="_blank" >Remix</a></p></body> |
| | </html> |