webscrap / index.html
amrashour3333's picture
Add 3 files
3f13a12 verified
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>WebScraper Pro - Chrome Extension</title>
<script src="https://cdn.tailwindcss.com"></script>
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.0/css/all.min.css">
<style>
/* Custom scrollbar */
::-webkit-scrollbar {
width: 8px;
}
::-webkit-scrollbar-track {
background: #f1f1f1;
}
::-webkit-scrollbar-thumb {
background: #888;
border-radius: 4px;
}
::-webkit-scrollbar-thumb:hover {
background: #555;
}
/* Animation for loading */
@keyframes pulse {
0%, 100% {
opacity: 1;
}
50% {
opacity: 0.5;
}
}
.animate-pulse {
animation: pulse 1.5s cubic-bezier(0.4, 0, 0.6, 1) infinite;
}
/* Custom switch toggle */
.switch {
position: relative;
display: inline-block;
width: 50px;
height: 24px;
}
.switch input {
opacity: 0;
width: 0;
height: 0;
}
.slider {
position: absolute;
cursor: pointer;
top: 0;
left: 0;
right: 0;
bottom: 0;
background-color: #ccc;
transition: .4s;
border-radius: 24px;
}
.slider:before {
position: absolute;
content: "";
height: 16px;
width: 16px;
left: 4px;
bottom: 4px;
background-color: white;
transition: .4s;
border-radius: 50%;
}
input:checked + .slider {
background-color: #3b82f6;
}
input:checked + .slider:before {
transform: translateX(26px);
}
</style>
</head>
<body class="bg-gray-100 min-h-screen">
<div class="container mx-auto px-4 py-8 max-w-6xl">
<!-- Header -->
<header class="bg-gradient-to-r from-blue-600 to-indigo-800 rounded-xl shadow-lg p-6 mb-8 text-white">
<div class="flex flex-col md:flex-row justify-between items-center">
<div class="flex items-center mb-4 md:mb-0">
<i class="fas fa-spider text-3xl mr-3"></i>
<h1 class="text-3xl font-bold">WebScraper Pro</h1>
</div>
<div class="flex space-x-4">
<button id="startScraping" class="bg-white text-blue-600 hover:bg-blue-50 px-4 py-2 rounded-lg font-semibold flex items-center">
<i class="fas fa-play mr-2"></i> Start Scraping
</button>
<button id="stopScraping" class="bg-red-500 hover:bg-red-600 text-white px-4 py-2 rounded-lg font-semibold flex items-center">
<i class="fas fa-stop mr-2"></i> Stop
</button>
</div>
</div>
<p class="mt-4 text-blue-100">Advanced web scraping tool with AI integration and comprehensive data collection</p>
</header>
<!-- Main Content -->
<div class="grid grid-cols-1 lg:grid-cols-3 gap-6">
<!-- Left Sidebar - Configuration -->
<div class="lg:col-span-1 bg-white rounded-xl shadow-md p-6">
<h2 class="text-xl font-bold mb-4 text-gray-800 border-b pb-2 flex items-center">
<i class="fas fa-cog mr-2 text-blue-500"></i> Configuration
</h2>
<!-- Scraping Options -->
<div class="mb-6">
<h3 class="font-semibold mb-2 text-gray-700 flex items-center">
<i class="fas fa-check-circle mr-2 text-green-500"></i> Data to Collect
</h3>
<div class="space-y-2">
<div class="flex items-center">
<input type="checkbox" id="collectText" class="mr-2" checked>
<label for="collectText">All Text Content</label>
</div>
<div class="flex items-center">
<input type="checkbox" id="collectLinks" class="mr-2" checked>
<label for="collectLinks">Links (a tags)</label>
</div>
<div class="flex items-center">
<input type="checkbox" id="collectImages" class="mr-2" checked>
<label for="collectImages">Images</label>
</div>
<div class="flex items-center">
<input type="checkbox" id="collectTables" class="mr-2" checked>
<label for="collectTables">Tables</label>
</div>
<div class="flex items-center">
<input type="checkbox" id="collectMeta" class="mr-2" checked>
<label for="collectMeta">Meta Tags</label>
</div>
<div class="flex items-center">
<input type="checkbox" id="collectForms" class="mr-2" checked>
<label for="collectForms">Forms</label>
</div>
</div>
</div>
<!-- Advanced Options -->
<div class="mb-6">
<h3 class="font-semibold mb-2 text-gray-700 flex items-center">
<i class="fas fa-sliders-h mr-2 text-purple-500"></i> Advanced Options
</h3>
<div class="space-y-3">
<div>
<label for="depth" class="block text-sm font-medium text-gray-700 mb-1">Scraping Depth</label>
<select id="depth" class="w-full p-2 border rounded-lg">
<option value="1">Current Page Only</option>
<option value="2">1 Level Deep</option>
<option value="3">2 Levels Deep</option>
<option value="0">All Links (Careful!)</option>
</select>
</div>
<div>
<label for="delay" class="block text-sm font-medium text-gray-700 mb-1">Delay Between Requests (ms)</label>
<input type="number" id="delay" value="1000" min="0" class="w-full p-2 border rounded-lg">
</div>
<div class="flex items-center justify-between">
<label for="headless" class="text-sm font-medium text-gray-700">Headless Mode</label>
<label class="switch">
<input type="checkbox" id="headless">
<span class="slider"></span>
</label>
</div>
</div>
</div>
<!-- Export Options -->
<div>
<h3 class="font-semibold mb-2 text-gray-700 flex items-center">
<i class="fas fa-file-export mr-2 text-yellow-500"></i> Export Options
</h3>
<div class="flex space-x-2">
<button id="exportJSON" class="bg-gray-200 hover:bg-gray-300 px-3 py-1 rounded-lg text-sm flex items-center">
<i class="fas fa-file-code mr-1"></i> JSON
</button>
<button id="exportCSV" class="bg-gray-200 hover:bg-gray-300 px-3 py-1 rounded-lg text-sm flex items-center">
<i class="fas fa-file-csv mr-1"></i> CSV
</button>
<button id="exportExcel" class="bg-gray-200 hover:bg-gray-300 px-3 py-1 rounded-lg text-sm flex items-center">
<i class="fas fa-file-excel mr-1"></i> Excel
</button>
</div>
</div>
</div>
<!-- Main Panel - Results and AI -->
<div class="lg:col-span-2 space-y-6">
<!-- Search Panel -->
<div class="bg-white rounded-xl shadow-md p-6">
<h2 class="text-xl font-bold mb-4 text-gray-800 border-b pb-2 flex items-center">
<i class="fas fa-search mr-2 text-green-500"></i> Web Search
</h2>
<div class="flex">
<input type="text" id="searchQuery" placeholder="Enter your search query..." class="flex-grow p-3 border rounded-l-lg focus:outline-none focus:ring-2 focus:ring-blue-500">
<select id="searchEngine" class="border-t border-b border-r p-3 rounded-r-lg bg-gray-50">
<option value="google">Google</option>
<option value="bing">Bing</option>
<option value="duckduckgo">DuckDuckGo</option>
<option value="youtube">YouTube</option>
</select>
<button id="performSearch" class="ml-2 bg-blue-600 hover:bg-blue-700 text-white px-4 py-3 rounded-lg font-semibold">
<i class="fas fa-search mr-1"></i> Search
</button>
</div>
<div id="searchResults" class="mt-4 hidden">
<h3 class="font-semibold mb-2">Search Results</h3>
<div class="border rounded-lg p-3 max-h-60 overflow-y-auto">
<div class="text-center py-4 text-gray-500">
<i class="fas fa-spinner fa-spin mr-2"></i> Loading results...
</div>
</div>
</div>
</div>
<!-- AI Integration Panel -->
<div class="bg-white rounded-xl shadow-md p-6">
<h2 class="text-xl font-bold mb-4 text-gray-800 border-b pb-2 flex items-center">
<i class="fas fa-robot mr-2 text-purple-500"></i> AI Analysis
</h2>
<div class="mb-4">
<label for="aiModel" class="block text-sm font-medium text-gray-700 mb-1">AI Model</label>
<select id="aiModel" class="w-full p-2 border rounded-lg">
<option value="gpt-4">GPT-4</option>
<option value="gpt-3.5">GPT-3.5</option>
<option value="claude">Claude</option>
<option value="bard">Bard</option>
<option value="llama">Llama 2</option>
</select>
</div>
<div class="mb-4">
<label for="apiKey" class="block text-sm font-medium text-gray-700 mb-1">API Key</label>
<div class="flex">
<input type="password" id="apiKey" placeholder="Enter your API key" class="flex-grow p-2 border rounded-l-lg">
<button id="toggleKeyVisibility" class="bg-gray-200 hover:bg-gray-300 px-3 border-t border-b border-r rounded-r-lg">
<i class="fas fa-eye"></i>
</button>
</div>
</div>
<div class="mb-4">
<label for="aiPrompt" class="block text-sm font-medium text-gray-700 mb-1">Prompt</label>
<textarea id="aiPrompt" rows="3" class="w-full p-2 border rounded-lg" placeholder="What would you like the AI to analyze?"></textarea>
</div>
<div class="flex justify-between">
<button id="analyzeData" class="bg-purple-600 hover:bg-purple-700 text-white px-4 py-2 rounded-lg font-semibold flex items-center">
<i class="fas fa-brain mr-2"></i> Analyze Data
</button>
<button id="clearAnalysis" class="bg-gray-200 hover:bg-gray-300 px-4 py-2 rounded-lg font-semibold">
Clear
</button>
</div>
<div id="aiResponse" class="mt-4 hidden">
<h3 class="font-semibold mb-2">AI Response</h3>
<div class="border rounded-lg p-3 bg-gray-50 min-h-32 max-h-64 overflow-y-auto">
<div class="text-center py-4 text-gray-500">
<i class="fas fa-comment-dots"></i> AI response will appear here
</div>
</div>
</div>
</div>
<!-- Results Panel -->
<div class="bg-white rounded-xl shadow-md p-6">
<h2 class="text-xl font-bold mb-4 text-gray-800 border-b pb-2 flex items-center">
<i class="fas fa-database mr-2 text-blue-500"></i> Scraping Results
</h2>
<div class="flex justify-between items-center mb-4">
<div class="text-sm text-gray-600">
<span id="resultCount">0</span> items collected
</div>
<div class="flex space-x-2">
<button id="clearResults" class="bg-gray-200 hover:bg-gray-300 px-3 py-1 rounded-lg text-sm">
Clear Results
</button>
<button id="filterResults" class="bg-gray-200 hover:bg-gray-300 px-3 py-1 rounded-lg text-sm flex items-center">
<i class="fas fa-filter mr-1"></i> Filter
</button>
</div>
</div>
<div id="scrapingResults" class="border rounded-lg overflow-hidden">
<div class="overflow-x-auto">
<table class="min-w-full divide-y divide-gray-200">
<thead class="bg-gray-50">
<tr>
<th scope="col" class="px-6 py-3 text-left text-xs font-medium text-gray-500 uppercase tracking-wider">Type</th>
<th scope="col" class="px-6 py-3 text-left text-xs font-medium text-gray-500 uppercase tracking-wider">Content</th>
<th scope="col" class="px-6 py-3 text-left text-xs font-medium text-gray-500 uppercase tracking-wider">Source</th>
</tr>
</thead>
<tbody id="resultsTableBody" class="bg-white divide-y divide-gray-200">
<tr>
<td colspan="3" class="px-6 py-4 text-center text-gray-500">
No data collected yet. Start scraping to see results.
</td>
</tr>
</tbody>
</table>
</div>
</div>
<div id="loadingIndicator" class="mt-4 hidden text-center">
<div class="inline-flex items-center px-4 py-2 bg-blue-100 text-blue-700 rounded-lg">
<i class="fas fa-spinner fa-spin mr-2"></i> Scraping in progress...
</div>
</div>
</div>
</div>
</div>
<!-- Footer -->
<footer class="mt-8 text-center text-gray-600 text-sm">
<p>WebScraper Pro Chrome Extension - Collect and analyze web data with AI</p>
<p class="mt-1">Version 1.0.0 | <a href="#" class="text-blue-600 hover:underline">Documentation</a> | <a href="#" class="text-blue-600 hover:underline">Support</a></p>
</footer>
</div>
<script>
// DOM Elements
const startScrapingBtn = document.getElementById('startScraping');
const stopScrapingBtn = document.getElementById('stopScraping');
const searchQueryInput = document.getElementById('searchQuery');
const searchEngineSelect = document.getElementById('searchEngine');
const performSearchBtn = document.getElementById('performSearch');
const searchResultsDiv = document.getElementById('searchResults');
const aiModelSelect = document.getElementById('aiModel');
const apiKeyInput = document.getElementById('apiKey');
const toggleKeyVisibilityBtn = document.getElementById('toggleKeyVisibility');
const aiPromptTextarea = document.getElementById('aiPrompt');
const analyzeDataBtn = document.getElementById('analyzeData');
const clearAnalysisBtn = document.getElementById('clearAnalysis');
const aiResponseDiv = document.getElementById('aiResponse');
const resultCountSpan = document.getElementById('resultCount');
const clearResultsBtn = document.getElementById('clearResults');
const filterResultsBtn = document.getElementById('filterResults');
const resultsTableBody = document.getElementById('resultsTableBody');
const loadingIndicator = document.getElementById('loadingIndicator');
const exportJSONBtn = document.getElementById('exportJSON');
const exportCSVBtn = document.getElementById('exportCSV');
const exportExcelBtn = document.getElementById('exportExcel');
// Sample data for demonstration
let scrapedData = [];
let searchResults = [];
let aiResponses = [];
// Event Listeners
startScrapingBtn.addEventListener('click', startScraping);
stopScrapingBtn.addEventListener('click', stopScraping);
performSearchBtn.addEventListener('click', performSearch);
toggleKeyVisibilityBtn.addEventListener('click', toggleKeyVisibility);
analyzeDataBtn.addEventListener('click', analyzeData);
clearAnalysisBtn.addEventListener('click', clearAnalysis);
clearResultsBtn.addEventListener('click', clearResults);
filterResultsBtn.addEventListener('click', filterResults);
exportJSONBtn.addEventListener('click', exportToJSON);
exportCSVBtn.addEventListener('click', exportToCSV);
exportExcelBtn.addEventListener('click', exportToExcel);
// Functions
function startScraping() {
loadingIndicator.classList.remove('hidden');
startScrapingBtn.disabled = true;
stopScrapingBtn.disabled = false;
// Simulate scraping process
setTimeout(() => {
// Generate sample data
scrapedData = generateSampleData();
updateResultsTable();
loadingIndicator.classList.add('hidden');
startScrapingBtn.disabled = false;
}, 3000);
}
function stopScraping() {
loadingIndicator.classList.add('hidden');
startScrapingBtn.disabled = false;
stopScrapingBtn.disabled = true;
// In a real extension, this would stop the scraping process
console.log("Scraping stopped");
}
function performSearch() {
const query = searchQueryInput.value.trim();
const engine = searchEngineSelect.value;
if (!query) {
alert("Please enter a search query");
return;
}
searchResultsDiv.classList.remove('hidden');
const resultsContainer = searchResultsDiv.querySelector('div > div');
resultsContainer.innerHTML = '<div class="text-center py-4 text-gray-500"><i class="fas fa-spinner fa-spin mr-2"></i> Searching ' + engine + ' for: ' + query + '</div>';
// Simulate search results
setTimeout(() => {
searchResults = generateSearchResults(query, engine);
displaySearchResults();
}, 1500);
}
function displaySearchResults() {
const resultsContainer = searchResultsDiv.querySelector('div > div');
resultsContainer.innerHTML = '';
if (searchResults.length === 0) {
resultsContainer.innerHTML = '<div class="text-center py-4 text-gray-500">No results found</div>';
return;
}
searchResults.forEach(result => {
const resultDiv = document.createElement('div');
resultDiv.className = 'mb-3 pb-3 border-b last:border-b-0';
resultDiv.innerHTML = `
<h4 class="font-medium text-blue-600 hover:underline cursor-pointer">${result.title}</h4>
<p class="text-sm text-gray-600">${result.url}</p>
<p class="text-sm mt-1 text-gray-700">${result.snippet}</p>
`;
resultsContainer.appendChild(resultDiv);
});
}
function toggleKeyVisibility() {
const icon = toggleKeyVisibilityBtn.querySelector('i');
if (apiKeyInput.type === 'password') {
apiKeyInput.type = 'text';
icon.classList.remove('fa-eye');
icon.classList.add('fa-eye-slash');
} else {
apiKeyInput.type = 'password';
icon.classList.remove('fa-eye-slash');
icon.classList.add('fa-eye');
}
}
function analyzeData() {
const apiKey = apiKeyInput.value.trim();
const model = aiModelSelect.value;
const prompt = aiPromptTextarea.value.trim();
if (!apiKey) {
alert("Please enter your API key");
return;
}
if (!prompt) {
alert("Please enter a prompt for the AI");
return;
}
aiResponseDiv.classList.remove('hidden');
const responseContainer = aiResponseDiv.querySelector('div > div');
responseContainer.innerHTML = '<div class="text-center py-4 text-gray-500"><i class="fas fa-spinner fa-spin mr-2"></i> Analyzing with ' + model + '...</div>';
// Simulate AI response
setTimeout(() => {
const aiResponse = generateAIResponse(prompt, model);
aiResponses.push(aiResponse);
responseContainer.innerHTML = `
<div class="mb-2">
<span class="bg-blue-100 text-blue-800 text-xs font-medium px-2.5 py-0.5 rounded">${model}</span>
</div>
<div class="prose max-w-none">
${aiResponse}
</div>
`;
}, 2500);
}
function clearAnalysis() {
aiPromptTextarea.value = '';
aiResponseDiv.classList.add('hidden');
}
function clearResults() {
scrapedData = [];
updateResultsTable();
}
function filterResults() {
// In a real implementation, this would filter the results
alert("Filter functionality would be implemented here");
}
function updateResultsTable() {
resultCountSpan.textContent = scrapedData.length;
if (scrapedData.length === 0) {
resultsTableBody.innerHTML = `
<tr>
<td colspan="3" class="px-6 py-4 text-center text-gray-500">
No data collected yet. Start scraping to see results.
</td>
</tr>
`;
return;
}
resultsTableBody.innerHTML = '';
scrapedData.forEach(item => {
const row = document.createElement('tr');
// Truncate content for display
let displayContent = item.content;
if (displayContent.length > 100) {
displayContent = displayContent.substring(0, 100) + '...';
}
row.innerHTML = `
<td class="px-6 py-4 whitespace-nowrap text-sm font-medium text-gray-900">${item.type}</td>
<td class="px-6 py-4 text-sm text-gray-500">${displayContent}</td>
<td class="px-6 py-4 text-sm text-gray-500">${item.source}</td>
`;
resultsTableBody.appendChild(row);
});
}
function exportToJSON() {
if (scrapedData.length === 0) {
alert("No data to export");
return;
}
const dataStr = JSON.stringify(scrapedData, null, 2);
downloadFile(dataStr, 'webscraper-data.json', 'application/json');
}
function exportToCSV() {
if (scrapedData.length === 0) {
alert("No data to export");
return;
}
// Simple CSV conversion
let csv = 'Type,Content,Source\n';
scrapedData.forEach(item => {
csv += `"${item.type}","${item.content.replace(/"/g, '""')}","${item.source}"\n`;
});
downloadFile(csv, 'webscraper-data.csv', 'text/csv');
}
function exportToExcel() {
alert("In a real implementation, this would export to Excel format");
}
function downloadFile(content, fileName, contentType) {
const blob = new Blob([content], { type: contentType });
const url = URL.createObjectURL(blob);
const a = document.createElement('a');
a.href = url;
a.download = fileName;
document.body.appendChild(a);
a.click();
setTimeout(() => {
document.body.removeChild(a);
URL.revokeObjectURL(url);
}, 100);
}
// Helper functions to generate sample data
function generateSampleData() {
const types = ['Text', 'Link', 'Image', 'Table', 'Meta Tag', 'Form'];
const sources = ['https://example.com', 'https://test.org', 'https://demo.net'];
const sampleTexts = [
'This is some sample text content that would be scraped from a webpage.',
'Another piece of content that might be interesting to collect.',
'Web scraping involves extracting data from websites for analysis.',
'Contact us for more information about our products and services.',
'Copyright © 2023 Example Company. All rights reserved.'
];
const data = [];
const itemCount = Math.floor(Math.random() * 15) + 5; // 5-20 items
for (let i = 0; i < itemCount; i++) {
const type = types[Math.floor(Math.random() * types.length)];
const source = sources[Math.floor(Math.random() * sources.length)];
let content = '';
if (type === 'Link') {
content = `<a href="${source}/page${i}">Link ${i}</a>`;
} else if (type === 'Image') {
content = `<img src="${source}/image${i}.jpg" alt="Sample image">`;
} else if (type === 'Table') {
content = '<table><tr><th>Header</th></tr><tr><td>Data</td></tr></table>';
} else if (type === 'Meta Tag') {
content = `<meta name="description" content="${sampleTexts[Math.floor(Math.random() * sampleTexts.length)]}">`;
} else if (type === 'Form') {
content = '<form><input type="text" name="username"><button>Submit</button></form>';
} else {
content = sampleTexts[Math.floor(Math.random() * sampleTexts.length)];
}
data.push({
type,
content,
source
});
}
return data;
}
function generateSearchResults(query, engine) {
const results = [];
const resultCount = Math.floor(Math.random() * 5) + 3; // 3-7 results
for (let i = 1; i <= resultCount; i++) {
results.push({
title: `${query} result ${i} from ${engine}`,
url: `https://${engine}.com/search?q=${encodeURIComponent(query)}&result=${i}`,
snippet: `This is a sample snippet for result ${i} about ${query}. The ${engine} search engine found this relevant content.`
});
}
return results;
}
function generateAIResponse(prompt, model) {
const responses = [
`Based on the scraped data and your prompt "${prompt}", I've analyzed the content and found several key patterns. The data suggests that most content falls into categories of informational text, navigation links, and media elements. This is typical for modern websites.`,
`Analysis of "${prompt}" reveals that the scraped data contains structured information that could be valuable for your research. The ${model} model identifies semantic relationships between different content types.`,
`The prompt "${prompt}" aligns well with the collected data. My analysis shows that 78% of content is textual, 15% is navigational, and 7% is media. This distribution suggests an information-rich website.`,
`Using ${model}, I've processed your request about "${prompt}". The results indicate strong thematic consistency across the scraped pages, with recurring topics and a clear information hierarchy.`
];
return responses[Math.floor(Math.random() * responses.length)];
}
</script>
<p style="border-radius: 8px; text-align: center; font-size: 12px; color: #fff; margin-top: 16px;position: fixed; left: 8px; bottom: 8px; z-index: 10; background: rgba(0, 0, 0, 0.8); padding: 4px 8px;">Made with <img src="https://enzostvs-deepsite.hf.space/logo.svg" alt="DeepSite Logo" style="width: 16px; height: 16px; vertical-align: middle;display:inline-block;margin-right:3px;filter:brightness(0) invert(1);"><a href="https://enzostvs-deepsite.hf.space" style="color: #fff;text-decoration: underline;" target="_blank" >DeepSite</a> - 🧬 <a href="https://enzostvs-deepsite.hf.space?remix=amrashour3333/webscrap" style="color: #fff;text-decoration: underline;" target="_blank" >Remix</a></p></body>
</html>