infi2colab's picture
now how can i connect it with my back end to give real results ?
e6c19ce verified
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>DocuSniff Pro - Smart Document Data Extraction</title>
<link rel="icon" type="image/x-icon" href="/static/favicon.ico">
<script src="https://cdn.tailwindcss.com"></script>
<script src="https://unpkg.com/feather-icons"></script>
<script src="https://cdn.jsdelivr.net/npm/feather-icons/dist/feather.min.js"></script>
<script>
tailwind.config = {
theme: {
extend: {
colors: {
primary: '#6366f1',
secondary: '#8b5cf6',
}
}
}
}
</script>
<style>
.dropzone {
border: 2px dashed #c7d2fe;
transition: all 0.3s ease;
}
.dropzone.active {
border-color: #6366f1;
background-color: #eef2ff;
}
#previewContainer {
transition: all 0.3s ease;
}
.entity-card {
transition: all 0.2s ease;
}
.entity-card:hover {
transform: translateY(-2px);
box-shadow: 0 10px 15px -3px rgba(0, 0, 0, 0.1), 0 4px 6px -2px rgba(0, 0, 0, 0.05);
}
</style>
</head>
<body class="bg-gray-50 min-h-screen">
<div class="max-w-7xl mx-auto px-4 sm:px-6 lg:px-8 py-12">
<!-- Header -->
<header class="text-center mb-12">
<h1 class="text-4xl font-bold text-gray-900 mb-2">DocuSniff Pro</h1>
<p class="text-lg text-gray-600">Upload documents and extract valuable business & personal information instantly</p>
<div class="flex justify-center mt-4">
<span class="inline-flex items-center px-3 py-1 rounded-full bg-primary-100 text-primary-800 text-sm font-medium">
<i data-feather="zap" class="w-4 h-4 mr-1"></i> Smart Data Extraction
</span>
</div>
</header>
<!-- Main Content -->
<div class="grid grid-cols-1 lg:grid-cols-3 gap-8">
<!-- Left Panel - Upload & Options -->
<div class="lg:col-span-1 space-y-6">
<!-- Document Upload -->
<div class="bg-white rounded-xl shadow-md p-6">
<h2 class="text-xl font-semibold text-gray-800 mb-4 flex items-center">
<i data-feather="upload" class="w-5 h-5 mr-2 text-primary-500"></i> Upload Documents
</h2>
<div id="dropzone" class="dropzone rounded-lg p-8 text-center cursor-pointer">
<i data-feather="file-plus" class="w-12 h-12 mx-auto text-gray-400 mb-3"></i>
<p class="text-gray-600 mb-2">Drag & drop files here</p>
<p class="text-sm text-gray-500 mb-4">or</p>
<input type="file" id="fileInput" class="hidden" accept=".pdf,.msg,.txt,.xlsx,.xls,.docx,.doc" multiple>
<button id="selectFilesBtn" class="bg-primary-500 hover:bg-primary-600 text-white px-4 py-2 rounded-md transition duration-200">
Select Files
</button>
<p class="text-xs text-gray-500 mt-3">Supports: PDF, MSG, TXT, Excel, Word</p>
</div>
<div id="fileList" class="mt-4 space-y-2 hidden">
<h3 class="text-sm font-medium text-gray-700">Selected Files:</h3>
<div id="selectedFiles" class="space-y-1"></div>
</div>
</div>
<!-- Extraction Options -->
<div class="bg-white rounded-xl shadow-md p-6">
<h2 class="text-xl font-semibold text-gray-800 mb-4 flex items-center">
<i data-feather="settings" class="w-5 h-5 mr-2 text-primary-500"></i> Extraction Options
</h2>
<!-- Preset Entities -->
<div class="mb-6">
<h3 class="text-sm font-medium text-gray-700 mb-2">Extract Entities:</h3>
<div class="grid grid-cols-2 gap-2">
<label class="flex items-center space-x-2">
<input type="checkbox" class="rounded text-primary-500" checked>
<span class="text-gray-800 text-sm">People</span>
</label>
<label class="flex items-center space-x-2">
<input type="checkbox" class="rounded text-primary-500" checked>
<span class="text-gray-800 text-sm">Companies</span>
</label>
<label class="flex items-center space-x-2">
<input type="checkbox" class="rounded text-primary-500" checked>
<span class="text-gray-800 text-sm">Emails</span>
</label>
<label class="flex items-center space-x-2">
<input type="checkbox" class="rounded text-primary-500" checked>
<span class="text-gray-800 text-sm">Phone Numbers</span>
</label>
<label class="flex items-center space-x-2">
<input type="checkbox" class="rounded text-primary-500">
<span class="text-gray-800 text-sm">Addresses</span>
</label>
<label class="flex items-center space-x-2">
<input type="checkbox" class="rounded text-primary-500">
<span class="text-gray-800 text-sm">Dates</span>
</label>
<label class="flex items-center space-x-2">
<input type="checkbox" class="rounded text-primary-500">
<span class="text-gray-800 text-sm">Job Titles</span>
</label>
<label class="flex items-center space-x-2">
<input type="checkbox" class="rounded text-primary-500">
<span class="text-gray-800 text-sm">Financial Data</span>
</label>
</div>
</div>
<!-- Custom Keywords -->
<div>
<h3 class="text-sm font-medium text-gray-700 mb-2">Custom Keywords:</h3>
<div class="flex">
<input type="text" id="customKeyword" class="flex-1 rounded-l-md border-gray-300 shadow-sm focus:border-primary-500 focus:ring-primary-500" placeholder="Add keywords to extract">
<button id="addKeywordBtn" class="bg-primary-500 hover:bg-primary-600 text-white px-4 py-2 rounded-r-md transition duration-200">
Add
</button>
</div>
<div id="keywordsList" class="flex flex-wrap gap-2 mt-2">
<!-- Keywords will be added here -->
</div>
</div>
</div>
<!-- Action Buttons -->
<div class="space-y-3">
<button id="extractBtn" class="w-full bg-primary-500 hover:bg-primary-600 text-white font-medium py-3 px-4 rounded-md transition duration-200 flex items-center justify-center disabled:opacity-50" disabled>
<i data-feather="search" class="w-5 h-5 mr-2"></i> Extract Information
</button>
<button id="clearBtn" class="w-full bg-gray-200 hover:bg-gray-300 text-gray-800 font-medium py-3 px-4 rounded-md transition duration-200 flex items-center justify-center">
<i data-feather="trash-2" class="w-5 h-5 mr-2"></i> Clear All
</button>
</div>
</div>
<!-- Right Panel - Results -->
<div class="lg:col-span-2">
<div class="bg-white rounded-xl shadow-md p-6">
<h2 class="text-xl font-semibold text-gray-800 mb-6 flex items-center">
<i data-feather="file-text" class="w-5 h-5 mr-2 text-primary-500"></i> Extracted Information
</h2>
<div id="emptyState" class="text-center py-12">
<i data-feather="file-text" class="w-12 h-12 mx-auto text-gray-300 mb-4"></i>
<h3 class="text-lg font-medium text-gray-900 mb-1">No data extracted yet</h3>
<p class="text-gray-500">Upload documents and click "Extract Information" to get started</p>
</div>
<div id="resultsContainer" class="hidden">
<!-- Document Preview Section -->
<div id="previewContainer" class="mb-8">
<div class="flex items-center justify-between mb-4">
<h3 class="text-lg font-medium text-gray-800">Document Preview</h3>
<div class="flex space-x-2">
<button id="prevDocBtn" class="text-gray-500 hover:text-primary-500 disabled:text-gray-300" disabled>
<i data-feather="chevron-left" class="w-5 h-5"></i>
</button>
<span id="docCounter" class="text-sm text-gray-500">1/1</span>
<button id="nextDocBtn" class="text-gray-500 hover:text-primary-500 disabled:text-gray-300" disabled>
<i data-feather="chevron-right" class="w-5 h-5"></i>
</button>
</div>
</div>
<div id="documentPreview" class="border rounded-lg p-4 h-64 overflow-y-auto bg-gray-50">
<!-- Document preview will be shown here -->
</div>
</div>
<!-- Extracted Entities -->
<div>
<div class="flex items-center justify-between mb-4">
<h3 class="text-lg font-medium text-gray-800">Extracted Entities</h3>
<div class="flex items-center space-x-2">
<button id="exportBtn" class="text-sm bg-secondary-500 hover:bg-secondary-600 text-white px-3 py-1 rounded-md transition duration-200 flex items-center">
<i data-feather="download" class="w-4 h-4 mr-1"></i> Export
</button>
</div>
</div>
<!-- Entity Categories -->
<div class="space-y-6">
<!-- People -->
<div id="peopleSection" class="hidden">
<div class="flex items-center mb-3">
<i data-feather="users" class="w-5 h-5 text-primary-500 mr-2"></i>
<h4 class="font-medium text-gray-800">People</h4>
<span id="peopleCount" class="ml-2 text-xs bg-primary-100 text-primary-800 px-2 py-1 rounded-full"></span>
</div>
<div id="peopleList" class="grid grid-cols-1 md:grid-cols-2 gap-3">
<!-- People cards will be added here -->
</div>
</div>
<!-- Companies -->
<div id="companiesSection" class="hidden">
<div class="flex items-center mb-3">
<i data-feather="briefcase" class="w-5 h-5 text-primary-500 mr-2"></i>
<h4 class="font-medium text-gray-800">Companies</h4>
<span id="companiesCount" class="ml-2 text-xs bg-primary-100 text-primary-800 px-2 py-1 rounded-full"></span>
</div>
<div id="companiesList" class="grid grid-cols-1 md:grid-cols-2 gap-3">
<!-- Company cards will be added here -->
</div>
</div>
<!-- Contact Info -->
<div id="contactSection" class="hidden">
<div class="flex items-center mb-3">
<i data-feather="mail" class="w-5 h-5 text-primary-500 mr-2"></i>
<h4 class="font-medium text-gray-800">Contact Information</h4>
<span id="contactCount" class="ml-2 text-xs bg-primary-100 text-primary-800 px-2 py-1 rounded-full"></span>
</div>
<div id="contactList" class="grid grid-cols-1 md:grid-cols-2 gap-3">
<!-- Contact cards will be added here -->
</div>
</div>
<!-- Custom Keywords -->
<div id="keywordsSection" class="hidden">
<div class="flex items-center mb-3">
<i data-feather="tag" class="w-5 h-5 text-primary-500 mr-2"></i>
<h4 class="font-medium text-gray-800">Custom Keywords</h4>
<span id="keywordsCount" class="ml-2 text-xs bg-primary-100 text-primary-800 px-2 py-1 rounded-full"></span>
</div>
<div id="keywordsResults" class="grid grid-cols-1 md:grid-cols-2 gap-3">
<!-- Keyword results will be added here -->
</div>
</div>
</div>
</div>
</div>
</div>
</div>
</div>
</div>
<script>
document.addEventListener('DOMContentLoaded', function() {
feather.replace();
// DOM Elements
const dropzone = document.getElementById('dropzone');
const fileInput = document.getElementById('fileInput');
const selectFilesBtn = document.getElementById('selectFilesBtn');
const fileList = document.getElementById('fileList');
const selectedFiles = document.getElementById('selectedFiles');
const extractBtn = document.getElementById('extractBtn');
const clearBtn = document.getElementById('clearBtn');
const addKeywordBtn = document.getElementById('addKeywordBtn');
const customKeyword = document.getElementById('customKeyword');
const keywordsList = document.getElementById('keywordsList');
const emptyState = document.getElementById('emptyState');
const resultsContainer = document.getElementById('resultsContainer');
const documentPreview = document.getElementById('documentPreview');
const prevDocBtn = document.getElementById('prevDocBtn');
const nextDocBtn = document.getElementById('nextDocBtn');
const docCounter = document.getElementById('docCounter');
const exportBtn = document.getElementById('exportBtn');
// State
let files = [];
let currentDocIndex = 0;
let extractionResults = [];
let customKeywords = [];
// Event Listeners
selectFilesBtn.addEventListener('click', () => fileInput.click());
fileInput.addEventListener('change', handleFileSelect);
dropzone.addEventListener('dragover', (e) => {
e.preventDefault();
dropzone.classList.add('active');
});
dropzone.addEventListener('dragleave', () => {
dropzone.classList.remove('active');
});
dropzone.addEventListener('drop', (e) => {
e.preventDefault();
dropzone.classList.remove('active');
fileInput.files = e.dataTransfer.files;
handleFileSelect({ target: fileInput });
});
extractBtn.addEventListener('click', extractInformation);
clearBtn.addEventListener('click', clearAll);
addKeywordBtn.addEventListener('click', addCustomKeyword);
customKeyword.addEventListener('keypress', (e) => {
if (e.key === 'Enter') addCustomKeyword();
});
prevDocBtn.addEventListener('click', () => navigateDocuments(-1));
nextDocBtn.addEventListener('click', () => navigateDocuments(1));
exportBtn.addEventListener('click', exportResults);
// Functions
function handleFileSelect(e) {
files = Array.from(e.target.files);
updateFileList();
extractBtn.disabled = files.length === 0;
}
function updateFileList() {
selectedFiles.innerHTML = '';
if (files.length === 0) {
fileList.classList.add('hidden');
return;
}
fileList.classList.remove('hidden');
files.forEach((file, index) => {
const fileEl = document.createElement('div');
fileEl.className = 'flex items-center justify-between p-2 bg-gray-50 rounded-lg';
const fileInfo = document.createElement('div');
fileInfo.className = 'flex items-center';
const icon = document.createElement('i');
icon.setAttribute('data-feather', getFileIcon(file.name));
icon.className = 'w-4 h-4 mr-2 text-gray-500';
const name = document.createElement('span');
name.className = 'text-sm text-gray-700 truncate max-w-xs';
name.textContent = file.name;
const removeBtn = document.createElement('button');
removeBtn.className = 'text-gray-400 hover:text-red-500';
removeBtn.innerHTML = '<i data-feather="x" class="w-4 h-4"></i>';
removeBtn.addEventListener('click', () => removeFile(index));
fileInfo.appendChild(icon);
fileInfo.appendChild(name);
fileEl.appendChild(fileInfo);
fileEl.appendChild(removeBtn);
selectedFiles.appendChild(fileEl);
});
feather.replace();
}
function getFileIcon(filename) {
const ext = filename.split('.').pop().toLowerCase();
switch(ext) {
case 'pdf': return 'file-text';
case 'msg': return 'mail';
case 'txt': return 'file';
case 'xlsx':
case 'xls': return 'file-text';
case 'docx':
case 'doc': return 'file-text';
default: return 'file';
}
}
function removeFile(index) {
files.splice(index, 1);
updateFileList();
extractBtn.disabled = files.length === 0;
}
function addCustomKeyword() {
const keyword = customKeyword.value.trim();
if (keyword && !customKeywords.includes(keyword)) {
customKeywords.push(keyword);
const keywordEl = document.createElement('div');
keywordEl.className = 'flex items-center bg-gray-100 px-3 py-1 rounded-full text-sm';
const text = document.createElement('span');
text.className = 'mr-2';
text.textContent = keyword;
const removeBtn = document.createElement('button');
removeBtn.className = 'text-gray-500 hover:text-red-500';
removeBtn.innerHTML = '<i data-feather="x" class="w-3 h-3"></i>';
removeBtn.addEventListener('click', () => {
customKeywords = customKeywords.filter(k => k !== keyword);
keywordsList.removeChild(keywordEl);
});
keywordEl.appendChild(text);
keywordEl.appendChild(removeBtn);
keywordsList.appendChild(keywordEl);
customKeyword.value = '';
feather.replace();
}
}
async function extractInformation() {
emptyState.classList.add('hidden');
resultsContainer.classList.remove('hidden');
extractBtn.disabled = true;
extractBtn.innerHTML = '<i data-feather="loader" class="w-5 h-5 mr-2 animate-spin"></i> Processing...';
feather.replace();
try {
// Create FormData to send files
const formData = new FormData();
files.forEach(file => formData.append('documents', file));
// Add extraction options
const checkboxes = document.querySelectorAll('input[type="checkbox"]:checked');
const entitiesToExtract = Array.from(checkboxes).map(cb => cb.nextElementSibling.textContent);
formData.append('entities', JSON.stringify(entitiesToExtract));
// Add custom keywords if any
if (customKeywords.length > 0) {
formData.append('keywords', JSON.stringify(customKeywords));
}
// Call your backend API (replace with your actual endpoint)
const response = await fetch('/api/extract', {
method: 'POST',
body: formData
});
if (!response.ok) {
throw new Error(`Error: ${response.status}`);
}
extractionResults = await response.json();
currentDocIndex = 0;
updateDocumentView();
} catch (error) {
console.error('Extraction failed:', error);
alert('Extraction failed. Please try again.');
} finally {
extractBtn.disabled = false;
extractBtn.innerHTML = '<i data-feather="search" class="w-5 h-5 mr-2"></i> Extract Information';
feather.replace();
}
}
function generateDocumentPreview(content) {
// If content is a string (from backend), use it directly
if (typeof content === 'string') {
return content;
}
// Fallback if no preview content is available
return `Preview not available for this document. Extracted entities are shown below.`;
}
function updateDocumentView() {
if (extractionResults.length === 0) return;
const currentDoc = extractionResults[currentDocIndex];
// Update document preview
documentPreview.textContent = generateDocumentPreview(currentDoc.preview || currentDoc.content);
// Update document counter
docCounter.textContent = `${currentDocIndex + 1}/${extractionResults.length}`;
// Update navigation buttons
prevDocBtn.disabled = currentDocIndex === 0;
nextDocBtn.disabled = currentDocIndex === extractionResults.length - 1;
// Update extracted entities (assumes backend returns entities in same format)
updateEntitiesDisplay(currentDoc.entities || {});
}
function updateDocumentView() {
if (extractionResults.length === 0) return;
const currentDoc = extractionResults[currentDocIndex];
// Update document preview
documentPreview.textContent = currentDoc.preview;
// Update document counter
docCounter.textContent = `${currentDocIndex + 1}/${extractionResults.length}`;
// Update navigation buttons
prevDocBtn.disabled = currentDocIndex === 0;
nextDocBtn.disabled = currentDocIndex === extractionResults.length - 1;
// Update extracted entities
updateEntitiesDisplay(currentDoc.entities);
}
function updateEntitiesDisplay(entities) {
// People
const peopleSection = document.getElementById('peopleSection');
const peopleList = document.getElementById('peopleList');
const peopleCount = document.getElementById('peopleCount');
if (entities.people.length > 0) {
peopleSection.classList.remove('hidden');
peopleList.innerHTML = '';
entities.people.forEach(person => {
peopleList.appendChild(createEntityCard('user', person.name, person.context));
});
peopleCount.textContent = entities.people.length;
} else {
peopleSection.classList.add('hidden');
}
// Companies
const companiesSection = document.getElementById('companiesSection');
const companiesList = document.getElementById('companiesList');
const companiesCount = document.getElementById('companiesCount');
if (entities.companies.length > 0) {
companiesSection.classList.remove('hidden');
companiesList.innerHTML = '';
entities.companies.forEach(company => {
companiesList.appendChild(createEntityCard('briefcase', company.name, company.context));
});
companiesCount.textContent = entities.companies.length;
} else {
companiesSection.classList.add('hidden');
}
// Contact Info (emails + phones)
const contactSection = document.getElementById('contactSection');
const contactList = document.getElementById('contactList');
const contactCount = document.getElementById('contactCount');
const contactItems = [...entities.emails, ...entities.phones];
if (contactItems.length > 0) {
contactSection.classList.remove('hidden');
contactList.innerHTML = '';
entities.emails.forEach(email => {
contactList.appendChild(createEntityCard('mail', email.value, email.context));
});
entities.phones.forEach(phone => {
contactList.appendChild(createEntityCard('phone', phone.value, phone.context));
});
contactCount.textContent = contactItems.length;
} else {
contactSection.classList.add('hidden');
}
// Custom Keywords
const keywordsSection = document.getElementById('keywordsSection');
const keywordsResults = document.getElementById('keywordsResults');
const keywordsCount = document.getElementById('keywordsCount');
if (entities.custom.length > 0) {
keywordsSection.classList.remove('hidden');
keywordsResults.innerHTML = '';
entities.custom.forEach(item => {
keywordsResults.appendChild(createEntityCard('tag', item.keyword, item.context));
});
keywordsCount.textContent = entities.custom.length;
} else {
keywordsSection.classList.add('hidden');
}
}
function createEntityCard(icon, title, description) {
const card = document.createElement('div');
card.className = 'entity-card bg-white border border-gray-200 rounded-lg p-4 shadow-sm';
const iconEl = document.createElement('i');
iconEl.setAttribute('data-feather', icon);
iconEl.className = 'w-5 h-5 text-primary-500 mb-2';
const titleEl = document.createElement('h5');
titleEl.className = 'font-medium text-gray-800 mb-1 truncate';
titleEl.textContent = title;
const descEl = document.createElement('p');
descEl.className = 'text-sm text-gray-600 line-clamp-2';
descEl.textContent = description;
card.appendChild(iconEl);
card.appendChild(titleEl);
card.appendChild(descEl);
feather.replace();
return card;
}
function navigateDocuments(direction) {
currentDocIndex += direction;
if (currentDocIndex < 0) currentDocIndex = 0;
if (currentDocIndex >= extractionResults.length) currentDocIndex = extractionResults.length - 1;
updateDocumentView();
}
function clearAll() {
files = [];
customKeywords = [];
extractionResults = [];
currentDocIndex = 0;
fileInput.value = '';
selectedFiles.innerHTML = '';
fileList.classList.add('hidden');
extractBtn.disabled = true;
keywordsList.innerHTML = '';
documentPreview.textContent = '';
emptyState.classList.remove('hidden');
resultsContainer.classList.add('hidden');
}
async function exportResults() {
try {
const response = await fetch('/api/export', {
method: 'POST',
headers: {
'Content-Type': 'application/json',
},
body: JSON.stringify({
results: extractionResults,
format: 'csv' // or 'json' based on user selection
})
});
if (!response.ok) {
throw new Error(`Export failed: ${response.status}`);
}
const blob = await response.blob();
const url = window.URL.createObjectURL(blob);
const a = document.createElement('a');
a.href = url;
a.download = `extracted_data_${new Date().toISOString().slice(0,10)}.csv`;
document.body.appendChild(a);
a.click();
window.URL.revokeObjectURL(url);
a.remove();
} catch (error) {
console.error('Export failed:', error);
alert('Export failed. Please try again.');
}
}
});
</script>
</body>
</html>