docusense-explorer / index.html
darkbreakerk's picture
Create svg html English presentation for this system
c602e04 verified
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>DocuSense Explorer - Document Processing System</title>
<link rel="stylesheet" href="style.css">
<script src="https://cdn.tailwindcss.com"></script>
<script src="https://cdn.jsdelivr.net/npm/feather-icons/dist/feather.min.js"></script>
<script src="https://unpkg.com/feather-icons"></script>
<script src="components/navbar.js"></script>
<script src="components/footer.js"></script>
</head>
<body class="bg-gray-50 dark:bg-gray-900 min-h-screen flex flex-col">
<custom-navbar></custom-navbar>
<main class="flex-grow container mx-auto px-4 py-8">
<div class="max-w-5xl mx-auto">
<!-- Hero Section -->
<section class="mb-16 text-center">
<div class="bg-gradient-to-r from-blue-600 to-indigo-700 text-white p-8 rounded-xl shadow-2xl">
<h1 class="text-4xl md:text-5xl font-bold mb-4">Document Processing <br>with Context Recovery</h1>
<p class="text-xl mb-8 max-w-3xl mx-auto">Smart section-based processing with full context retrieval</p>
<div class="flex justify-center gap-4">
<a href="#overview" class="bg-white text-blue-700 px-6 py-3 rounded-lg font-medium hover:bg-blue-50 transition">Learn More</a>
<a href="#demo" class="border-2 border-white text-white px-6 py-3 rounded-lg font-medium hover:bg-white hover:text-blue-700 transition">See Demo</a>
</div>
</div>
</section>
<!-- System Overview -->
<section id="overview" class="mb-16">
<h2 class="text-3xl font-bold mb-8 text-center dark:text-white">System Architecture</h2>
<div class="relative">
<!-- Architecture SVG -->
<div class="bg-white dark:bg-gray-800 p-6 rounded-xl shadow-lg">
<img src="architecture.svg" alt="System Architecture Diagram" class="w-full h-auto rounded-lg">
</div>
<!-- Key Features -->
<div class="grid md:grid-cols-2 lg:grid-cols-3 gap-6 mt-8">
<div class="bg-white dark:bg-gray-800 p-6 rounded-xl shadow hover:shadow-lg transition">
<div class="bg-blue-100 dark:bg-blue-900 w-12 h-12 rounded-full flex items-center justify-center mb-4">
<i data-feather="layers" class="text-blue-600 dark:text-blue-300"></i>
</div>
<h3 class="text-xl font-semibold mb-2 dark:text-white">Section-Based Processing</h3>
<p class="text-gray-600 dark:text-gray-300">Documents are intelligently split into meaningful sections while preserving hierarchy.</p>
</div>
<div class="bg-white dark:bg-gray-800 p-6 rounded-xl shadow hover:shadow-lg transition">
<div class="bg-purple-100 dark:bg-purple-900 w-12 h-12 rounded-full flex items-center justify-center mb-4">
<i data-feather="cpu" class="text-purple-600 dark:text-purple-300"></i>
</div>
<h3 class="text-xl font-semibold mb-2 dark:text-white">Context Recovery</h3>
<p class="text-gray-600 dark:text-gray-300">Retrieve the full original section content even when searching small chunks.</p>
</div>
<div class="bg-white dark:bg-gray-800 p-6 rounded-xl shadow hover:shadow-lg transition">
<div class="bg-green-100 dark:bg-green-900 w-12 h-12 rounded-full flex items-center justify-center mb-4">
<i data-feather="zap" class="text-green-600 dark:text-green-300"></i>
</div>
<h3 class="text-xl font-semibold mb-2 dark:text-white">Hybrid Search</h3>
<p class="text-gray-600 dark:text-gray-300">Combines vector similarity and BM25 for precise and relevant results.</p>
</div>
</div>
</div>
</section>
<!-- Workflow Demo -->
<section id="demo" class="mb-16">
<h2 class="text-3xl font-bold mb-8 text-center dark:text-white">Workflow Demonstration</h2>
<div class="bg-white dark:bg-gray-800 rounded-xl shadow-lg overflow-hidden">
<div class="grid md:grid-cols-2 gap-0">
<!-- Step Navigation -->
<div class="bg-gray-50 dark:bg-gray-700 p-6">
<div class="sticky top-6">
<h3 class="text-xl font-semibold mb-4 dark:text-white">Process Steps</h3>
<div class="space-y-2">
<button class="workflow-step active" data-step="1">1. Document Upload</button>
<button class="workflow-step" data-step="2">2. Section Splitting</button>
<button class="workflow-step" data-step="3">3. Chunk Generation</button>
<button class="workflow-step" data-step="4">4. Storage</button>
<button class="workflow-step" data-step="5">5. Search Process</button>
</div>
</div>
</div>
<!-- Step Content -->
<div class="p-6">
<div class="workflow-content" data-step-content="1">
<h3 class="text-xl font-semibold mb-4 dark:text-white">Document Upload</h3>
<p class="mb-4 text-gray-600 dark:text-gray-300">Users upload documents through the API endpoint:</p>
<pre class="bg-gray-100 dark:bg-gray-900 rounded-lg p-4 mb-4 overflow-x-auto">
<code class="text-sm">
POST /index-file/
{
"list_path": [
{
"id": "uuid-file-1",
"path": "extracted/document.md"
}
]
}
</code>
</pre>
<p class="text-gray-600 dark:text-gray-300">The system creates a unique job ID for processing and returns immediately.</p>
</div>
<div class="workflow-content hidden" data-step-content="2">
<h3 class="text-xl font-semibold mb-4 dark:text-white">Section Splitting</h3>
<p class="mb-4 text-gray-600 dark:text-gray-300">Documents are split into hierarchical sections:</p>
<div class="bg-gray-100 dark:bg-gray-900 rounded-lg p-4 mb-4">
<div class="flex items-center mb-2">
<div class="w-2 h-2 bg-blue-500 rounded-full mr-2"></div>
<span class="font-mono text-sm"># Main Title</span>
</div>
<div class="flex items-center mb-2 ml-4">
<div class="w-2 h-2 bg-purple-500 rounded-full mr-2"></div>
<span class="font-mono text-sm">## Section 1</span>
</div>
<div class="flex items-center mb-2 ml-8">
<div class="w-2 h-2 bg-green-500 rounded-full mr-2"></div>
<span class="font-mono text-sm">### Subsection 1.1</span>
</div>
<div class="flex items-center ml-4">
<div class="w-2 h-2 bg-purple-500 rounded-full mr-2"></div>
<span class="font-mono text-sm">## Section 2</span>
</div>
</div>
<p class="text-gray-600 dark:text-gray-300">Each section maintains its page information and document context.</p>
</div>
<div class="workflow-content hidden" data-step-content="3">
<h3 class="text-xl font-semibold mb-4 dark:text-white">Chunk Generation</h3>
<p class="mb-4 text-gray-600 dark:text-gray-300">Large sections are split into optimally-sized chunks:</p>
<div class="flex items-start mb-4">
<div class="border-r-2 border-gray-300 dark:border-gray-600 pr-4 mr-4">
<div class="bg-blue-100 dark:bg-blue-900 px-3 py-2 rounded-lg mb-2">
<p class="text-sm">Section ID: 123e4567</p>
</div>
<div class="space-y-2">
<div class="bg-gray-200 dark:bg-gray-700 px-3 py-2 rounded-lg">
<p class="text-sm">Chunk 1 (Tokens: 512)</p>
</div>
<div class="bg-gray-200 dark:bg-gray-700 px-3 py-2 rounded-lg">
<p class="text-sm">Chunk 2 (Tokens: 498)</p>
</div>
</div>
</div>
<div>
<p class="text-gray-600 dark:text-gray-300 text-sm">Each chunk references its parent section while being small enough for efficient vector search.</p>
</div>
</div>
</div>
<div class="workflow-content hidden" data-step-content="4">
<h3 class="text-xl font-semibold mb-4 dark:text-white">Storage</h3>
<div class="grid grid-cols-1 md:grid-cols-2 gap-4 mb-4">
<div class="bg-gray-100 dark:bg-gray-900 p-4 rounded-lg">
<div class="flex items-center mb-2">
<i data-feather="database" class="mr-2 text-blue-500"></i>
<h4 class="font-medium">PostgreSQL</h4>
</div>
<p class="text-sm text-gray-600 dark:text-gray-300">Stores complete section content with metadata</p>
</div>
<div class="bg-gray-100 dark:bg-gray-900 p-4 rounded-lg">
<div class="flex items-center mb-2">
<i data-feather="box" class="mr-2 text-purple-500"></i>
<h4 class="font-medium">Milvus</h4>
</div>
<p class="text-sm text-gray-600 dark:text-gray-300">Stores vector embeddings of chunks with section references</p>
</div>
</div>
</div>
<div class="workflow-content hidden" data-step-content="5">
<h3 class="text-xl font-semibold mb-4 dark:text-white">Search Process</h3>
<ol class="space-y-4">
<li class="flex items-start">
<div class="bg-blue-500 text-white rounded-full w-6 h-6 flex items-center justify-center mr-3 mt-0.5 flex-shrink-0">1</div>
<div>
<p class="font-medium dark:text-white">Query Processing</p>
<p class="text-sm text-gray-600 dark:text-gray-300">Convert search query to embeddings and search Milvus</p>
</div>
</li>
<li class="flex items-start">
<div class="bg-blue-500 text-white rounded-full w-6 h-6 flex items-center justify-center mr-3 mt-0.5 flex-shrink-0">2</div>
<div>
<p class="font-medium dark:text-white">Result Aggregation</p>
<p class="text-sm text-gray-600 dark:text-gray-300">Group chunks by their section_id and score</p>
</div>
</li>
<li class="flex items-start">
<div class="bg-blue-500 text-white rounded-full w-6 h-6 flex items-center justify-center mr-3 mt-0.5 flex-shrink-0">3</div>
<div>
<p class="font-medium dark:text-white">Context Retrieval</p>
<p class="text-sm text-gray-600 dark:text-gray-300">Fetch full section content from PostgreSQL</p>
</div>
</li>
</ol>
</div>
</div>
</div>
</div>
</section>
<!-- Benefits Section -->
<section class="mb-16">
<h2 class="text-3xl font-bold mb-8 text-center dark:text-white">System Benefits</h2>
<div class="grid md:grid-cols-3 gap-6">
<div class="bg-white dark:bg-gray-800 p-6 rounded-xl shadow-lg border-t-4 border-blue-500">
<h3 class="text-xl font-semibold mb-3 dark:text-white">Full Context Recovery</h3>
<p class="text-gray-600 dark:text-gray-300">Retrieve complete section content even when searching small chunks, ensuring no context is lost.</p>
</div>
<div class="bg-white dark:bg-gray-800 p-6 rounded-xl shadow-lg border-t-4 border-purple-500">
<h3 class="text-xl font-semibold mb-3 dark:text-white">Hybrid Search</h3>
<p class="text-gray-600 dark:text-gray-300">Combine semantic vector search with keyword-based BM25 for more accurate results.</p>
</div>
<div class="bg-white dark:bg-gray-800 p-6 rounded-xl shadow-lg border-t-4 border-green-500">
<h3 class="text-xl font-semibold mb-3 dark:text-white">Optimal Performance</h3>
<p class="text-gray-600 dark:text-gray-300">Fast vector search on small chunks with instant retrieval of full sections from PostgreSQL.</p>
</div>
</div>
</section>
</div>
</main>
<custom-footer></custom-footer>
<script>
feather.replace();
// Workflow step navigation
document.addEventListener('DOMContentLoaded', function() {
const steps = document.querySelectorAll('.workflow-step');
const contents = document.querySelectorAll('.workflow-content');
steps.forEach(step => {
step.addEventListener('click', function() {
// Remove active class from all steps
steps.forEach(s => s.classList.remove('active', 'bg-blue-500', 'text-white'));
// Add active class to clicked step
this.classList.add('active', 'bg-blue-500', 'text-white');
// Hide all content
contents.forEach(c => c.classList.add('hidden'));
// Show corresponding content
const stepNum = this.getAttribute('data-step');
document.querySelector(`[data-step-content="${stepNum}"]`).classList.remove('hidden');
});
});
// Activate first step by default
document.querySelector('.workflow-step').click();
});
</script>
<script src="https://huggingface.co/deepsite/deepsite-badge.js"></script>
</body>
</html>