Spaces:
Running
Running
| <html lang="en"> | |
| <head> | |
| <meta charset="UTF-8"> | |
| <meta name="viewport" content="width=device-width, initial-scale=1.0"> | |
| <title>The Digital Collector - Herholdts.co.za Scraper</title> | |
| <script src="https://cdn.tailwindcss.com"></script> | |
| <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.0/css/all.min.css"> | |
| <style> | |
| .gradient-bg { | |
| background: linear-gradient(135deg, #4338ca 0%, #10b981 100%); | |
| } | |
| .crawl-level-indicator { | |
| transition: all 0.3s ease; | |
| } | |
| .crawl-level-indicator:hover { | |
| transform: scale(1.05); | |
| } | |
| .progress-ring__circle { | |
| transition: stroke-dashoffset 0.35s; | |
| transform: rotate(-90deg); | |
| transform-origin: 50% 50%; | |
| } | |
| .module-card { | |
| transition: all 0.3s ease; | |
| } | |
| .module-card:hover { | |
| transform: translateY(-5px); | |
| box-shadow: 0 20px 25px -5px rgba(0, 0, 0, 0.1), 0 10px 10px -5px rgba(0, 0, 0, 0.04); | |
| } | |
| .tab-content { | |
| display: none; | |
| opacity: 0; | |
| transform: translateY(10px); | |
| } | |
| .tab-content.active { | |
| display: block; | |
| opacity: 1; | |
| transform: translateY(0); | |
| transition: opacity 0.3s ease, transform 0.3s ease; | |
| } | |
| .custom-scrollbar::-webkit-scrollbar { | |
| width: 6px; | |
| height: 6px; | |
| } | |
| .custom-scrollbar::-webkit-scrollbar-track { | |
| background: #f1f1f1; | |
| } | |
| .custom-scrollbar::-webkit-scrollbar-thumb { | |
| background: #888; | |
| border-radius: 3px; | |
| } | |
| .custom-scrollbar::-webkit-scrollbar-thumb:hover { | |
| background: #555; | |
| } | |
| .loading-spinner { | |
| animation: spin 1s linear infinite; | |
| } | |
| @keyframes spin { | |
| 0% { transform: rotate(0deg); } | |
| 100% { transform: rotate(360deg); } | |
| } | |
| .crawl-error { | |
| background-color: #fff5f5; | |
| border-left: 4px solid #f56565; | |
| } | |
| .crawl-success { | |
| background-color: #f0fff4; | |
| border-left: 4px solid #48bb78; | |
| } | |
| </style> | |
| </head> | |
| <body class="bg-gray-50 min-h-screen"> | |
| <!-- Navigation --> | |
| <nav class="bg-white shadow-sm"> | |
| <div class="max-w-7xl mx-auto px-4 sm:px-6 lg:px-8"> | |
| <div class="flex justify-between h-16"> | |
| <div class="flex items-center"> | |
| <div class="flex-shrink-0 flex items-center"> | |
| <i class="fas fa-spider text-blue-600 text-2xl mr-2"></i> | |
| <span class="text-xl font-bold text-gray-900">The Digital Collector</span> | |
| </div> | |
| <div class="hidden sm:ml-6 sm:flex sm:space-x-8"> | |
| <a href="#" class="border-blue-500 text-gray-900 inline-flex items-center px-1 pt-1 border-b-2 text-sm font-medium nav-tab active" data-tab="dashboard"> | |
| Dashboard | |
| </a> | |
| <a href="#" class="border-transparent text-gray-500 hover:border-gray-300 hover:text-gray-700 inline-flex items-center px-1 pt-1 border-b-2 text-sm font-medium nav-tab" data-tab="new-crawl"> | |
| New Crawl | |
| </a> | |
| <a href="#" class="border-transparent text-gray-500 hover:border-gray-300 hover:text-gray-700 inline-flex items-center px-1 pt-1 border-b-2 text-sm font-medium nav-tab" data-tab="results"> | |
| Results | |
| </a> | |
| <a href="#" class="border-transparent text-gray-500 hover:border-gray-300 hover:text-gray-700 inline-flex items-center px-1 pt-1 border-b-2 text-sm font-medium nav-tab" data-tab="modules"> | |
| Modules | |
| </a> | |
| </div> | |
| </div> | |
| <div class="hidden sm:ml-6 sm:flex sm:items-center"> | |
| <button class="bg-blue-600 hover:bg-blue-700 text-white px-4 py-2 rounded-md text-sm font-medium flex items-center"> | |
| <i class="fas fa-user-circle mr-2"></i> Account | |
| </button> | |
| </div> | |
| <div class="-mr-2 flex items-center sm:hidden"> | |
| <button type="button" class="inline-flex items-center justify-center p-2 rounded-md text-gray-400 hover:text-gray-500 hover:bg-gray-100 focus:outline-none focus:ring-2 focus:ring-inset focus:ring-blue-500" aria-controls="mobile-menu" aria-expanded="false" id="mobile-menu-button"> | |
| <span class="sr-only">Open main menu</span> | |
| <i class="fas fa-bars"></i> | |
| </button> | |
| </div> | |
| </div> | |
| </div> | |
| <!-- Mobile menu --> | |
| <div class="sm:hidden hidden" id="mobile-menu"> | |
| <div class="pt-2 pb-3 space-y-1"> | |
| <a href="#" class="bg-blue-50 border-blue-500 text-blue-700 block pl-3 pr-4 py-2 border-l-4 text-base font-medium nav-tab active" data-tab="dashboard">Dashboard</a> | |
| <a href="#" class="border-transparent text-gray-500 hover:bg-gray-50 hover:border-gray-300 hover:text-gray-700 block pl-3 pr-4 py-2 border-l-4 text-base font-medium nav-tab" data-tab="new-crawl">New Crawl</a> | |
| <a href="#" class="border-transparent text-gray-500 hover:bg-gray-50 hover:border-gray-300 hover:text-gray-700 block pl-3 pr-4 py-2 border-l-4 text-base font-medium nav-tab" data-tab="results">Results</a> | |
| <a href="#" class="border-transparent text-gray-500 hover:bg-gray-50 hover:border-gray-300 hover:text-gray-700 block pl-3 pr-4 py-2 border-l-4 text-base font-medium nav-tab" data-tab="modules">Modules</a> | |
| </div> | |
| <div class="pt-4 pb-3 border-t border-gray-200"> | |
| <div class="flex items-center px-4"> | |
| <div class="flex-shrink-0"> | |
| <i class="fas fa-user-circle text-gray-400 text-2xl"></i> | |
| </div> | |
| <div class="ml-3"> | |
| <div class="text-base font-medium text-gray-800">User Account</div> | |
| <div class="text-sm font-medium text-gray-500">user@example.com</div> | |
| </div> | |
| </div> | |
| <div class="mt-3 space-y-1"> | |
| <a href="#" class="block px-4 py-2 text-base font-medium text-gray-500 hover:text-gray-800 hover:bg-gray-100">Your Profile</a> | |
| <a href="#" class="block px-4 py-2 text-base font-medium text-gray-500 hover:text-gray-800 hover:bg-gray-100">Settings</a> | |
| <a href="#" class="block px-4 py-2 text-base font-medium text-gray-500 hover:text-gray-800 hover:bg-gray-100">Sign out</a> | |
| </div> | |
| </div> | |
| </div> | |
| </nav> | |
| <!-- Main Content --> | |
| <main class="max-w-7xl mx-auto px-4 sm:px-6 lg:px-8 py-8"> | |
| <!-- Dashboard Tab --> | |
| <div class="tab-content active" id="dashboard-content"> | |
| <div class="mb-8"> | |
| <h1 class="text-3xl font-bold text-gray-900 mb-2">Dashboard</h1> | |
| <p class="text-gray-600">Monitor your crawling activities and system status</p> | |
| </div> | |
| <div class="grid grid-cols-1 md:grid-cols-2 lg:grid-cols-4 gap-6 mb-8"> | |
| <!-- Stats Cards --> | |
| <div class="bg-white overflow-hidden shadow rounded-lg"> | |
| <div class="px-4 py-5 sm:p-6"> | |
| <div class="flex items-center"> | |
| <div class="flex-shrink-0 bg-blue-500 rounded-md p-3"> | |
| <i class="fas fa-globe text-white text-xl"></i> | |
| </div> | |
| <div class="ml-5 w-0 flex-1"> | |
| <dt class="text-sm font-medium text-gray-500 truncate">Active Crawls</dt> | |
| <dd class="flex items-baseline"> | |
| <div class="text-2xl font-semibold text-gray-900">1</div> | |
| </dd> | |
| </div> | |
| </div> | |
| </div> | |
| </div> | |
| <div class="bg-white overflow-hidden shadow rounded-lg"> | |
| <div class="px-4 py-5 sm:p-6"> | |
| <div class="flex items-center"> | |
| <div class="flex-shrink-0 bg-green-500 rounded-md p-3"> | |
| <i class="fas fa-database text-white text-xl"></i> | |
| </div> | |
| <div class="ml-5 w-0 flex-1"> | |
| <dt class="text-sm font-medium text-gray-500 truncate">Products Collected</dt> | |
| <dd class="flex items-baseline"> | |
| <div class="text-2xl font-semibold text-gray-900" id="total-products">0</div> | |
| </dd> | |
| </div> | |
| </div> | |
| </div> | |
| </div> | |
| <div class="bg-white overflow-hidden shadow rounded-lg"> | |
| <div class="px-4 py-5 sm:p-6"> | |
| <div class="flex items-center"> | |
| <div class="flex-shrink-0 bg-yellow-500 rounded-md p-3"> | |
| <i class="fas fa-clock text-white text-xl"></i> | |
| </div> | |
| <div class="ml-5 w-0 flex-1"> | |
| <dt class="text-sm font-medium text-gray-500 truncate">Scheduled Jobs</dt> | |
| <dd class="flex items-baseline"> | |
| <div class="text-2xl font-semibold text-gray-900">0</div> | |
| </dd> | |
| </div> | |
| </div> | |
| </div> | |
| </div> | |
| <div class="bg-white overflow-hidden shadow rounded-lg"> | |
| <div class="px-4 py-5 sm:p-6"> | |
| <div class="flex items-center"> | |
| <div class="flex-shrink-0 bg-red-500 rounded-md p-3"> | |
| <i class="fas fa-exclamation-triangle text-white text-xl"></i> | |
| </div> | |
| <div class="ml-5 w-0 flex-1"> | |
| <dt class="text-sm font-medium text-gray-500 truncate">Errors Today</dt> | |
| <dd class="flex items-baseline"> | |
| <div class="text-2xl font-semibold text-gray-9" id="error-count">0</div> | |
| </dd> | |
| </div> | |
| </div> | |
| </div> | |
| </div> | |
| </div> | |
| <!-- Recent Activity --> | |
| <div class="bg-white shadow rounded-lg mb-8"> | |
| <div class="px-4 py-5 sm:px-6 border-b border-gray-200"> | |
| <h3 class="text-lg leading-6 font-medium text-gray-900">Recent Activity</h3> | |
| </div> | |
| <div class="bg-white overflow-hidden"> | |
| <ul class="divide-y divide-gray-200" id="activity-feed"> | |
| <li class="px-6 py-4"> | |
| <div class="flex items-center"> | |
| <div class="flex-shrink-0 bg-blue-100 rounded-md p-2"> | |
| <i class="fas fa-info-circle text-blue-600"></i> | |
| </div> | |
| <div class="ml-4"> | |
| <div class="text-sm font-medium text-gray-900">New Herholdts.co.za crawler template created</div> | |
| <div class="text-sm text-gray-500">Specialized template for product extraction</div> | |
| </div> | |
| <div class="ml-auto text-sm text-gray-500">Just now</div> | |
| </div> | |
| </li> | |
| </ul> | |
| </div> | |
| </div> | |
| <!-- System Status --> | |
| <div class="bg-white shadow rounded-lg"> | |
| <div class="px-4 py-5 sm:px-6 border-b border-gray-200"> | |
| <h3 class="text-lg leading-6 font-medium text-gray-900">System Status</h3> | |
| </div> | |
| <div class="px-4 py-5 sm:p-6"> | |
| <div class="grid grid-cols-1 md:grid-cols-3 gap-6"> | |
| <div> | |
| <h4 class="text-sm font-medium text-gray-500 mb-2">CPU Usage</h4> | |
| <div class="relative w-full h-4 bg-gray-200 rounded-full overflow-hidden"> | |
| <div class="absolute top-0 left-0 h-full bg-blue-500" id="cpu-bar" style="width: 35%"></div> | |
| </div> | |
| <div class="mt-1 text-sm text-gray-500"><span id="cpu-percent">35</span>% - 4 cores @ 2.8GHz</div> | |
| </div> | |
| <div> | |
| <h4 class="text-sm font-medium text-gray-500 mb-2">Memory Usage</h4> | |
| <div class="relative w-full h-4 bg-gray-200 rounded-full overflow-hidden"> | |
| <div class="absolute top-0 left-0 h-full bg-green-500" id="memory-bar" style="width: 42%"></div> | |
| </div> | |
| <div class="mt-1 text-sm text-gray-500"><span id="memory-percent">42</span>% - 3.2GB / 7.6GB</div> | |
| </div> | |
| <div> | |
| <h4 class="text-sm font-medium text-gray-500 mb-2">Disk Usage</h4> | |
| <div class="relative w-full h-4 bg-gray-200 rounded-full overflow-hidden"> | |
| <div class="absolute top-0 left-0 h-full bg-yellow-500" id="disk-bar" style="width: 28%"></div> | |
| </div> | |
| <div class="mt-1 text-sm text-gray-500"><span id="disk-percent">28</span>% - 56GB / 200GB</div> | |
| </div> | |
| </div> | |
| <div class="mt-6"> | |
| <h4 class="text-sm font-medium text-gray-500 mb-2">Crawler Status</h4> | |
| <div class="bg-gray-50 rounded-md p-4" id="crawler-status"> | |
| <div class="flex items-center"> | |
| <div class="flex-shrink-0 bg-green-100 rounded-full p-2"> | |
| <i class="fas fa-check-circle text-green-600"></i> | |
| </div> | |
| <div class="ml-3"> | |
| <p class="text-sm font-medium text-gray-700">Crawler service is operational</p> | |
| </div> | |
| </div> | |
| </div> | |
| </div> | |
| </div> | |
| </div> | |
| </div> | |
| <!-- New Crawl Tab --> | |
| <div class="tab-content" id="new-crawl-content"> | |
| <div class="mb-8"> | |
| <h1 class="text-3xl font-bold text-gray-900 mb-2">New Web Crawl</h1> | |
| <p class="text-gray-600">Configure and launch a new web crawling job</p> | |
| </div> | |
| <div class="flex flex-col lg:flex-row gap-6"> | |
| <div class="lg:w-2/3"> | |
| <div class="bg-white shadow rounded-lg overflow-hidden"> | |
| <div class="px-4 py-5 sm:px-6 border-b border-gray-200"> | |
| <h3 class="text-lg leading-6 font-medium text-gray-900">Herholdts.co.za Product Crawler</h3> | |
| </div> | |
| <div class="px-4 py-5 sm:p-6"> | |
| <form id="crawl-form"> | |
| <div class="grid grid-cols-1 gap-6"> | |
| <!-- Basic Settings --> | |
| <div> | |
| <label for="url" class="block text-sm font-medium text-gray-700">Target Website</label> | |
| <div class="mt-1 flex rounded-md shadow-sm"> | |
| <span class="inline-flex items-center px-3 rounded-l-md border border-r-0 border-gray-300 bg-gray-50 text-gray-500 text-sm"> | |
| https:// | |
| </span> | |
| <input type="text" name="url" id="url" class="focus:ring-blue-500 focus:border-blue-500 flex-1 block w-full rounded-none rounded-r-md sm:text-sm border-gray-300" value="herholdts.co.za" readonly> | |
| </div> | |
| </div> | |
| <div> | |
| <label for="crawl-name" class="block text-sm font-medium text-gray-700">Crawl Name</label> | |
| <input type="text" name="crawl-name" id="crawl-name" class="mt-1 focus:ring-blue-500 focus:border-blue-500 block w-full shadow-sm sm:text-sm border-gray-300 rounded-md" value="Herholdts Products - Full Extraction"> | |
| </div> | |
| <!-- Data Points to Collect --> | |
| <div> | |
| <label class="block text-sm font-medium text-gray-700 mb-2">Data Points to Collect</label> | |
| <div class="grid grid-cols-1 sm:grid-cols-2 gap-4"> | |
| <div class="flex items-start"> | |
| <div class="flex items-center h-5"> | |
| <input id="collect-title" name="collect-title" type="checkbox" class="focus:ring-blue-500 h-4 w-4 text-blue-600 border-gray-300 rounded" checked> | |
| </div> | |
| <div class="ml-3 text-sm"> | |
| <label for="collect-title" class="font-medium text-gray-700">Product Title</label> | |
| </div> | |
| </div> | |
| <div class="flex items-start"> | |
| <div class="flex items-center h-5"> | |
| <input id="collect-price" name="collect-price" type="checkbox" class="focus:ring-blue-500 h-4 w-4 text-blue-600 border-gray-300 rounded" checked> | |
| </div> | |
| <div class="ml-3 text-sm"> | |
| <label for="collect-price" class="font-medium text-gray-700">Price</label> | |
| </div> | |
| </div> | |
| <div class="flex items-start"> | |
| <div class="flex items-center h-5"> | |
| <input id="collect-description" name="collect-description" type="checkbox" class="focus:ring-blue-500 h-4 w-4 text-blue-600 border-gray-300 rounded" checked> | |
| </div> | |
| <div class="ml-3 text-sm"> | |
| <label for="collect-description" class="font-medium text-gray-700">Description</label> | |
| </div> | |
| </div> | |
| <div class="flex items-start"> | |
| <div class="flex items-center h-5"> | |
| <input id="collect-images" name="collect-images" type="checkbox" class="focus:ring-blue-500 h-4 w-4 text-blue-600 border-gray-300 rounded" checked> | |
| </div> | |
| <div class="ml-3 text-sm"> | |
| <label for="collect-images" class="font-medium text-gray-700">Images</label> | |
| </div> | |
| </div> | |
| <div class="flex items-start"> | |
| <div class="flex items-center h-5"> | |
| <input id="collect-sku" name="collect-sku" type="checkbox" class="focus:ring-blue-500 h-4 w-4 text-blue-600 border-gray-300 rounded" checked> | |
| </div> | |
| <div class="ml-3 text-sm"> | |
| <label for="collect-sku" class="font-medium text-gray-700">SKU/Code</label> | |
| </div> | |
| </div> | |
| <div class="flex items-start"> | |
| <div class="flex items-center h-5"> | |
| <input id="collect-categories" name="collect-categories" type="checkbox" class="focus:ring-blue-500 h-4 w-4 text-blue-600 border-gray-300 rounded" checked> | |
| </div> | |
| <div class="ml-3 text-sm"> | |
| <label for="collect-categories" class="font-medium text-gray-700">Categories</label> | |
| </div> | |
| </div> | |
| <div class="flex items-start"> | |
| <div class="flex items-center h-5"> | |
| <input id="collect-specs" name="collect-specs" type="checkbox" class="focus:ring-blue-500 h-4 w-4 text-blue-600 border-gray-300 rounded" checked> | |
| </div> | |
| <div class="ml-3 text-sm"> | |
| <label for="collect-specs" class="font-medium text-gray-700">Specifications</label> | |
| </div> | |
| </div> | |
| <div class="flex items-start"> | |
| <div class="flex items-center h-5"> | |
| <input id="collect-availability" name="collect-availability" type="checkbox" class="focus:ring-blue-500 h-4 w-4 text-blue-600 border-gray-300 rounded" checked> | |
| </div> | |
| <div class="ml-3 text-sm"> | |
| <label for="collect-availability" class="font-medium text-gray-700">Availability</label> | |
| </div> | |
| </div> | |
| </div> | |
| </div> | |
| <!-- Crawl Parameters --> | |
| <div class="border-t border-gray-200 pt-4"> | |
| <h3 class="text-lg font-medium text-gray-900 mb-4">Crawl Parameters</h3> | |
| <div class="mb-4"> | |
| <label for="max-pages" class="block text-sm font-medium text-gray-700">Maximum Pages to Crawl</label> | |
| <input type="number" id="max-pages" name="max-pages" min="1" max="1000" value="50" class="mt-1 focus:ring-blue-500 focus:border-blue-500 block w-full shadow-sm sm:text-sm border-gray-300 rounded-md"> | |
| <p class="mt-1 text-sm text-gray-500">Set to 0 for unlimited (not recommended)</p> | |
| </div> | |
| <div class="mb-4"> | |
| <label for="request-delay" class="block text-sm font-medium text-gray-700">Request Delay (seconds)</label> | |
| <input type="number" id="request-delay" name="request-delay" min="1" max="60" value="3" class="mt-1 focus:ring-blue-500 focus:border-blue-500 block w-full shadow-sm sm:text-sm border-gray-300 rounded-md"> | |
| <p class="mt-1 text-sm text-gray-500">Higher values reduce server load and detection risk</p> | |
| </div> | |
| <div class="mb-4"> | |
| <label for="timeout" class="block text-sm font-medium text-gray-700">Page Load Timeout (seconds)</label> | |
| <input type="number" id="timeout" name="timeout" min="10" max="120" value="30" class="mt-1 focus:ring-blue-500 focus:border-blue-500 block w-full shadow-sm sm:text-sm border-gray-300 rounded-md"> | |
| <p class="mt-1 text-sm text-gray-500">Maximum time to wait for each page to load</p> | |
| </div> | |
| </div> | |
| <!-- Advanced Options --> | |
| <div class="border-t border-gray-200 pt-4"> | |
| <div class="flex items-center justify-between"> | |
| <h3 class="text-lg font-medium text-gray-900">Advanced Options</h3> | |
| <button type="button" class="text-blue-600 hover:text-blue-500 text-sm font-medium" id="toggle-advanced"> | |
| Show Advanced | |
| </button> | |
| </div> | |
| <div class="mt-4 space-y-4 hidden" id="advanced-options"> | |
| <!-- Crawl Speed --> | |
| <div> | |
| <label class="block text-sm font-medium text-gray-700">Crawl Speed</label> | |
| <div class="mt-2"> | |
| <div class="flex items-center space-x-4"> | |
| <div class="flex items-center"> | |
| <input id="speed-slow" name="crawl-speed" type="radio" class="focus:ring-blue-500 h-4 w-4 text-blue-600 border-gray-300" value="slow"> | |
| <label for="speed-slow" class="ml-2 block text-sm text-gray-700">Slow (stealth)</label> | |
| </div> | |
| <div class="flex items-center"> | |
| <input id="speed-medium" name="crawl-speed" type="radio" class="focus:ring-blue-500 h-4 w-4 text-blue-600 border-gray-300" value="medium" checked> | |
| <label for="speed-medium" class="ml-2 block text-sm text-gray-700">Medium</label> | |
| </div> | |
| <div class="flex items-center"> | |
| <input id="speed-fast" name="crawl-speed" type="radio" class="focus:ring-blue-500 h-4 w-4 text-blue-600 border-gray-300" value="fast"> | |
| <label for="speed-fast" class="ml-2 block text-sm text-gray-700">Fast</label> | |
| </div> | |
| </div> | |
| </div> | |
| </div> | |
| <!-- Respect Robots.txt --> | |
| <div class="flex items-start"> | |
| <div class="flex items-center h-5"> | |
| <input id="respect-robots" name="respect-robots" type="checkbox" class="focus:ring-blue-500 h-4 w-4 text-blue-600 border-gray-300 rounded" checked> | |
| </div> | |
| <div class="ml-3 text-sm"> | |
| <label for="respect-robots" class="font-medium text-gray-700">Respect robots.txt</label> | |
| <p class="text-gray-500">Follow the website's crawling rules</p> | |
| </div> | |
| </div> | |
| <!-- JavaScript Rendering --> | |
| <div class="flex items-start"> | |
| <div class="flex items-center h-5"> | |
| <input id="js-rendering" name="js-rendering" type="checkbox" class="focus:ring-blue-500 h-4 w-4 text-blue-600 border-gray-300 rounded"> | |
| </div> | |
| <div class="ml-3 text-sm"> | |
| <label for="js-rendering" class="font-medium text-gray-700">JavaScript Rendering</label> | |
| <p class="text-gray-500">Execute JavaScript on pages (slower but more complete)</p> | |
| </div> | |
| </div> | |
| <!-- Proxy Configuration --> | |
| <div> | |
| <label for="proxy-type" class="block text-sm font-medium text-gray-700">Proxy Configuration</label> | |
| <select id="proxy-type" name="proxy-type" class="mt-1 block w-full pl-3 pr-10 py-2 text-base border-gray-300 focus:outline-none focus:ring-blue-500 focus:border-blue-500 sm:text-sm rounded-md"> | |
| <option value="none" selected>No Proxy</option> | |
| <option value="rotating">Rotating Proxies</option> | |
| <option value="custom">Custom Proxy List</option> | |
| </select> | |
| <div id="proxy-custom-container" class="mt-2 hidden"> | |
| <label for="proxy-list" class="block text-sm font-medium text-gray-700">Proxy List (one per line)</label> | |
| <textarea id="proxy-list" name="proxy-list" rows="3" class="mt-1 focus:ring-blue-500 focus:border-blue-500 block w-full shadow-sm sm:text-sm border-gray-300 rounded-md" placeholder="http://username:password@proxy_ip:port"></textarea> | |
| </div> | |
| </div> | |
| <!-- Custom Headers --> | |
| <div> | |
| <label for="custom-headers" class="block text-sm font-medium text-gray-700">Custom Headers (JSON)</label> | |
| <textarea id="custom-headers" name="custom-headers" rows="3" class="mt-1 focus:ring-blue-500 focus:border-blue-500 block w-full shadow-sm sm:text-sm border-gray-300 rounded-md" placeholder='{"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64)", "Accept-Language": "en-US"}'></textarea> | |
| <p class="mt-1 text-sm text-gray-500">Specify custom HTTP headers for requests</p> | |
| </div> | |
| </div> | |
| </div> | |
| </div> | |
| <div class="mt-8 flex justify-end"> | |
| <button type="button" class="bg-white py-2 px-4 border border-gray-300 rounded-md shadow-sm text-sm font-medium text-gray-700 hover:bg-gray-50 focus:outline-none focus:ring-2 focus:ring-offset-2 focus:ring-blue-500"> | |
| Cancel | |
| </button> | |
| <button type="submit" class="ml-3 inline-flex justify-center py-2 px-4 border border-transparent shadow-sm text-sm font-medium rounded-md text-white bg-blue-600 hover:bg-blue-700 focus:outline-none focus:ring-2 focus:ring-offset-2 focus:ring-blue-500" id="start-crawl-btn"> | |
| <i class="fas fa-spider mr-2"></i> Start Crawling | |
| </button> | |
| </div> | |
| </form> | |
| </div> | |
| </div> | |
| </div> | |
| <div class="lg:w-1/3"> | |
| <div class="bg-white shadow rounded-lg overflow-hidden"> | |
| <div class="px-4 py-5 sm:px-6 border-b border-gray-200"> | |
| <h3 class="text-lg leading-6 font-medium text-gray-900">Crawl Preview</h3> | |
| </div> | |
| <div class="px-4 py-5 sm:p-6"> | |
| <div class="rounded-lg border-2 border-dashed border-gray-300 p-4"> | |
| <div class="text-center"> | |
| <i class="fas fa-eye text-gray-400 text-3xl mb-2"></i> | |
| <h4 class="text-sm font-medium text-gray-700">Crawl Configuration Preview</h4> | |
| <p class="mt-1 text-sm text-gray-500">Adjust settings to see how they affect your crawl</p> | |
| </div> | |
| <div class="mt-4 space-y-3" id="preview-config"> | |
| <div class="flex justify-between text-sm"> | |
| <span class="text-gray-500">Target URL:</span> | |
| <span class="font-medium text-gray-900">herholdts.co.za</span> | |
| </div> | |
| <div class="flex justify-between text-sm"> | |
| <span class="text-gray-500">Data Points:</span> | |
| <span class="font-medium text-gray-900">8 selected</span> | |
| </div> | |
| <div class="flex justify-between text-sm"> | |
| <span class="text-gray-500">Max Pages:</span> | |
| <span class="font-medium text-gray-900" id="preview-max-pages">50</span> | |
| </div> | |
| <div class="flex justify-between text-sm"> | |
| <span class="text-gray-500">Request Delay:</span> | |
| <span class="font-medium text-gray-900" id="preview-delay">3 seconds</span> | |
| </div> | |
| <div class="flex justify-between text-sm"> | |
| <span class="text-gray-500">Proxy:</span> | |
| <span class="font-medium text-gray-900" id="preview-proxy">None</span> | |
| </div> | |
| </div> | |
| </div> | |
| <div class="mt-6"> | |
| <h4 class="text-sm font-medium text-gray-700 mb-2">Sample Output Structure</h4> | |
| <div class="bg-gray-50 p-3 rounded-md text-xs font-mono overflow-x-auto custom-scrollbar" style="max-height: 200px;"> | |
| <pre>{ | |
| "products": [ | |
| { | |
| "title": "Product Name", | |
| "price": "R1,299.00", | |
| "original_price": "R1,499.00", | |
| "description": "Detailed product description...", | |
| "images": [ | |
| "https://herholdts.co.za/image1.jpg", | |
| "https://herholdts.co.za/image2.jpg" | |
| ], | |
| "sku": "PROD12345", | |
| "categories": ["Category 1", "Subcategory 1"], | |
| "specifications": { | |
| "Material": "Stainless Steel", | |
| "Dimensions": "30 x 20 x 15 cm", | |
| "Weight": "1.5 kg" | |
| }, | |
| "availability": "In Stock", | |
| "url": "https://herholdts.co.za/product-url" | |
| } | |
| ], | |
| "metadata": { | |
| "crawl_date": "2023-06-20", | |
| "pages_crawled": 24, | |
| "products_found": | |
| </html> |