docling-rag-pipeline-wizard / configure.html
gmossy's picture
i need a docling rag ingestion pipeline
0fd8c45 verified
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Configure Pipeline | DoclingRag</title>
<link rel="stylesheet" href="style.css">
<script src="https://cdn.tailwindcss.com"></script>
<script src="https://cdn.jsdelivr.net/npm/feather-icons/dist/feather.min.js"></script>
<script src="https://unpkg.com/feather-icons"></script>
</head>
<body class="bg-gray-900 text-white min-h-screen flex flex-col">
<custom-navbar></custom-navbar>
<main class="flex-grow container mx-auto px-4 py-8">
<div class="max-w-5xl mx-auto bg-gray-800 bg-opacity-70 backdrop-blur-md rounded-xl p-8 shadow-2xl">
<div class="mb-8">
<h1 class="text-3xl md:text-4xl font-bold mb-2">Pipeline Configuration</h1>
<p class="text-gray-400">Customize your document processing workflow</p>
</div>
<div class="grid md:grid-cols-3 gap-6">
<div class="md:col-span-2">
<div class="bg-gray-700 bg-opacity-50 rounded-lg p-6 mb-6">
<h2 class="text-xl font-semibold mb-4 flex items-center">
<i data-feather="layers" class="w-5 h-5 mr-2"></i>
Processing Steps
</h2>
<div class="space-y-4">
<div class="flex items-start bg-gray-600 bg-opacity-50 p-4 rounded-lg">
<div class="flex-shrink-0 mt-1 mr-3">
<div class="h-4 w-4 rounded-full bg-indigo-500"></div>
</div>
<div class="flex-grow">
<div class="flex justify-between items-center">
<h3 class="font-medium">OCR Processing</h3>
<label class="relative inline-flex items-center cursor-pointer">
<input type="checkbox" checked class="sr-only peer">
<div class="w-11 h-6 bg-gray-700 peer-focus:outline-none rounded-full peer peer-checked:after:translate-x-full peer-checked:after:border-white after:content-[''] after:absolute after:top-[2px] after:left-[2px] after:bg-white after:border-gray-300 after:border after:rounded-full after:h-5 after:w-5 after:transition-all peer-checked:bg-indigo-600"></div>
</label>
</div>
<p class="text-sm text-gray-400 mt-1">Enable optical character recognition for image-based documents</p>
</div>
</div>
<div class="flex items-start bg-gray-600 bg-opacity-50 p-4 rounded-lg">
<div class="flex-shrink-0 mt-1 mr-3">
<div class="h-4 w-4 rounded-full bg-purple-500"></div>
</div>
<div class="flex-grow">
<div class="flex justify-between items-center">
<h3 class="font-medium">Text Extraction</h3>
<label class="relative inline-flex items-center cursor-pointer">
<input type="checkbox" checked class="sr-only peer">
<div class="w-11 h-6 bg-gray-700 peer-focus:outline-none rounded-full peer peer-checked:after:translate-x-full peer-checked:after:border-white after:content-[''] after:absolute after:top-[2px] after:left-[2px] after:bg-white after:border-gray-300 after:border after:rounded-full after:h-5 after:w-5 after:transition-all peer-checked:bg-purple-600"></div>
</label>
</div>
<p class="text-sm text-gray-400 mt-1">Extract and clean text from documents</p>
<div class="mt-2 space-y-2">
<div class="flex items-center">
<input type="radio" id="extract-full" name="extraction-mode" checked class="h-4 w-4 text-blue-600 focus:ring-blue-500 border-gray-600">
<label for="extract-full" class="ml-2 block text-sm text-gray-300">Full text extraction</label>
</div>
<div class="flex items-center">
<input type="radio" id="extract-key" name="extraction-mode" class="h-4 w-4 text-blue-600 focus:ring-blue-500 border-gray-600">
<label for="extract-key" class="ml-2 block text-sm text-gray-300">Key sections only</label>
</div>
</div>
</div>
</div>
<div class="flex items-start bg-gray-600 bg-opacity-50 p-4 rounded-lg">
<div class="flex-shrink-0 mt-1 mr-3">
<div class="h-4 w-4 rounded-full bg-blue-500"></div>
</div>
<div class="flex-grow">
<div class="flex justify-between items-center">
<h3 class="font-medium">Chunking Strategy</h3>
<label class="relative inline-flex items-center cursor-pointer">
<input type="checkbox" checked class="sr-only peer">
<div class="w-11 h-6 bg-gray-700 peer-focus:outline-none rounded-full peer peer-checked:after:translate-x-full peer-checked:after:border-white after:content-[''] after:absolute after:top-[2px] after:left-[2px] after:bg-white after:border-gray-300 after:border after:rounded-full after:h-5 after:w-5 after:transition-all peer-checked:bg-blue-600"></div>
</label>
</div>
<p class="text-sm text-gray-400 mt-1">Split documents into manageable chunks for processing</p>
<div class="mt-3">
<label class="block text-sm font-medium text-gray-300 mb-1">Chunk Size</label>
<select class="bg-gray-700 border border-gray-600 text-white text-sm rounded-lg focus:ring-blue-500 focus:border-blue-500 block w-full p-2.5">
<option selected>Small (256 tokens)</option>
<option>Medium (512 tokens)</option>
<option>Large (1024 tokens)</option>
</select>
</div>
</div>
</div>
<div class="flex items-start bg-gray-600 bg-opacity-50 p-4 rounded-lg">
<div class="flex-shrink-0 mt-1 mr-3">
<div class="h-4 w-4 rounded-full bg-green-500"></div>
</div>
<div class="flex-grow">
<div class="flex justify-between items-center">
<h3 class="font-medium">Metadata Extraction</h3>
<label class="relative inline-flex items-center cursor-pointer">
<input type="checkbox" checked class="sr-only peer">
<div class="w-11 h-6 bg-gray-700 peer-focus:outline-none rounded-full peer peer-checked:after:translate-x-full peer-checked:after:border-white after:content-[''] after:absolute after:top-[2px] after:left-[2px] after:bg-white after:border-gray-300 after:border after:rounded-full after:h-5 after:w-5 after:transition-all peer-checked:bg-green-600"></div>
</label>
</div>
<p class="text-sm text-gray-400 mt-1">Extract document metadata (author, date, etc.)</p>
</div>
</div>
</div>
</div>
<div class="bg-gray-700 bg-opacity-50 rounded-lg p-6">
<h2 class="text-xl font-semibold mb-4 flex items-center">
<i data-feather="cpu" class="w-5 h-5 mr-2"></i>
AI Models
</h2>
<div class="space-y-4">
<div class="bg-gray-600 bg-opacity-50 p-4 rounded-lg">
<label class="block text-sm font-medium text-gray-300 mb-2">Embedding Model</label>
<select class="bg-gray-700 border border-gray-600 text-white text-sm rounded-lg focus:ring-blue-500 focus:border-blue-500 block w-full p-2.5">
<option selected>OpenAI text-embedding-3-small</option>
<option>OpenAI text-embedding-3-large</option>
<option>HuggingFace all-MiniLM-L6-v2</option>
<option>Cohere embed-english-v3.0</option>
</select>
<p class="text-xs text-gray-400 mt-1">Model used to convert text into vector embeddings</p>
</div>
<div class="bg-gray-600 bg-opacity-50 p-4 rounded-lg">
<label class="block text-sm font-medium text-gray-300 mb-2">LLM Model</label>
<select class="bg-gray-700 border border-gray-600 text-white text-sm rounded-lg focus:ring-blue-500 focus:border-blue-500 block w-full p-2.5">
<option selected>OpenAI GPT-4-turbo</option>
<option>OpenAI GPT-3.5-turbo</option>
<option>Anthropic Claude 3 Sonnet</option>
<option>Mistral 7B</option>
</select>
<p class="text-xs text-gray-400 mt-1">Model used for text generation and analysis</p>
</div>
</div>
</div>
</div>
<div class="space-y-6">
<div class="bg-gray-700 bg-opacity-50 rounded-lg p-6">
<h2 class="text-xl font-semibold mb-4 flex items-center">
<i data-feather="save" class="w-5 h-5 mr-2"></i>
Save Configuration
</h2>
<p class="text-gray-400 mb-4">Save your pipeline configuration for future use</p>
<div class="space-y-3">
<div>
<label class="block text-sm font-medium text-gray-300 mb-1">Configuration Name</label>
<input type="text" class="bg-gray-700 border border-gray-600 text-white text-sm rounded-lg focus:ring-blue-500 focus:border-blue-500 block w-full p-2.5" placeholder="My Pipeline Config">
</div>
<div>
<label class="block text-sm font-medium text-gray-300 mb-1">Description</label>
<textarea rows="3" class="bg-gray-700 border border-gray-600 text-white text-sm rounded-lg focus:ring-blue-500 focus:border-blue-500 block w-full p-2.5" placeholder="Describe this configuration..."></textarea>
</div>
<button class="w-full mt-4 px-6 py-2 bg-indigo-600 hover:bg-indigo-700 rounded-md font-medium transition-colors">
Save Configuration
</button>
</div>
</div>
<div class="bg-gray-700 bg-opacity-50 rounded-lg p-6">
<h2 class="text-xl font-semibold mb-4 flex items-center">
<i data-feather="file-text" class="w-5 h-5 mr-2"></i>
Document Types
</h2>
<p class="text-gray-400 mb-4">Select which document types this pipeline should process</p>
<div class="space-y-2">
<div class="flex items-center">
<input id="pdf-checkbox" type="checkbox" checked class="w-4 h-4 text-blue-600 bg-gray-700 border-gray-600 rounded focus:ring-blue-500">
<label for="pdf-checkbox" class="ml-2 text-sm font-medium text-gray-300">PDF</label>
</div>
<div class="flex items-center">
<input id="docx-checkbox" type="checkbox" checked class="w-4 h-4 text-blue-600 bg-gray-700 border-gray-600 rounded focus:ring-blue-500">
<label for="docx-checkbox" class="ml-2 text-sm font-medium text-gray-300">Word (DOCX)</label>
</div>
<div class="flex items-center">
<input id="ppt-checkbox" type="checkbox" class="w-4 h-4 text-blue-600 bg-gray-700 border-gray-600 rounded focus:ring-blue-500">
<label for="ppt-checkbox" class="ml-2 text-sm font-medium text-gray-300">PowerPoint</label>
</div>
<div class="flex items-center">
<input id="xls-checkbox" type="checkbox" class="w-4 h-4 text-blue-600 bg-gray-700 border-gray-600 rounded focus:ring-blue-500">
<label for="xls-checkbox" class="ml-2 text-sm font-medium text-gray-300">Excel</label>
</div>
<div class="flex items-center">
<input id="txt-checkbox" type="checkbox" checked class="w-4 h-4 text-blue-600 bg-gray-700 border-gray-600 rounded focus:ring-blue-500">
<label for="txt-checkbox" class="ml-2 text-sm font-medium text-gray-300">Plain Text</label>
</div>
<div class="flex items-center">
<input id="image-checkbox" type="checkbox" class="w-4 h-4 text-blue-600 bg-gray-700 border-gray-600 rounded focus:ring-blue-500">
<label for="image-checkbox" class="ml-2 text-sm font-medium text-gray-300">Images (JPG, PNG)</label>
</div>
</div>
</div>
</div>
</div>
</div>
</main>
<custom-footer></custom-footer>
<script src="components/navbar.js"></script>
<script src="components/footer.js"></script>
<script src="script.js"></script>
<script>
feather.replace();
// Update navbar active state
document.addEventListener('DOMContentLoaded', () => {
const currentPath = window.location.pathname;
const navLinks = document.querySelectorAll('.nav-link');
navLinks.forEach(link => {
if (link.getAttribute('href') === currentPath) {
link.classList.add('active');
} else {
link.classList.remove('active');
}
});
});
</script>
</body>
</html>