Spaces:
Running
Running
File size: 4,986 Bytes
5d66d3c e010252 5d66d3c e010252 5d66d3c 3a12466 5d66d3c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 |
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>DocExtractor Pro - Convert Files to Text Formats</title>
<link rel="stylesheet" href="style.css">
<script src="https://cdn.tailwindcss.com"></script>
<script src="https://cdn.jsdelivr.net/npm/feather-icons/dist/feather.min.js"></script>
<script src="https://unpkg.com/feather-icons"></script>
<!-- Enhanced PDF Processing -->
<script src="https://cdnjs.cloudflare.com/ajax/libs/pdf.js/3.11.174/pdf.min.js"></script>
<script src="https://cdnjs.cloudflare.com/ajax/libs/pdf-lib/1.17.1/pdf-lib.min.js"></script>
<!-- Enhanced Document Processing -->
<script src="https://cdnjs.cloudflare.com/ajax/libs/mammoth/1.6.0/mammoth.browser.min.js"></script>
<script src="https://cdnjs.cloudflare.com/ajax/libs/xlsx/0.18.5/xlsx.full.min.js"></script>
<!-- Enhanced OCR with Multiple Languages -->
<script src="https://cdn.jsdelivr.net/npm/tesseract.js@4/dist/tesseract.min.js"></script>
<script src="https://unpkg.com/ocr-space-api@1.0.0/dist/ocr-space-api.min.js"></script>
<!-- Image Processing -->
<script src="https://cdnjs.cloudflare.com/ajax/libs/cropperjs/1.6.1/cropper.min.js"></script>
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/cropperjs/1.6.1/cropper.min.css">
<!-- Additional Language Support -->
<script src="https://cdn.jsdelivr.net/npm/he@1.2.0/he.js"></script>
<script src="https://cdnjs.cloudflare.com/ajax/libs/iconv-lite/0.6.3/iconv-lite.min.js"></script>
<script src="script.js"></script>
</head>
<body class="bg-gray-50 min-h-screen">
<custom-navbar></custom-navbar>
<main class="container mx-auto px-4 py-8">
<section class="max-w-4xl mx-auto bg-white rounded-xl shadow-md overflow-hidden p-6 mb-8">
<h1 class="text-3xl font-bold text-gray-800 mb-4">Document Extractor</h1>
<p class="text-gray-600 mb-6">Upload PDF, Word, or Excel files to extract text with formatting, tables, and images (OCR). Convert to JSON or Markdown formats.</p>
<div class="border-2 border-dashed border-gray-300 rounded-lg p-8 text-center mb-6">
<input type="file" id="fileInput" class="hidden" accept=".pdf,.docx,.xlsx,.xls,.doc,.png,.jpg,.jpeg" multiple>
<button id="uploadBtn" class="bg-indigo-600 hover:bg-indigo-700 text-white font-medium py-3 px-6 rounded-lg transition duration-200 flex items-center mx-auto">
<i data-feather="upload" class="mr-2"></i> Choose Files
</button>
<p class="text-gray-500 mt-3">Supported formats: PDF, DOCX, XLSX, JPG, PNG</p>
</div>
<div id="filePreviewContainer" class="hidden">
<h2 class="text-xl font-semibold text-gray-800 mb-4">Selected Files</h2>
<div id="filePreviewList" class="space-y-3 mb-6"></div>
</div>
<div class="flex flex-col sm:flex-row gap-4 mb-6">
<div class="w-full">
<label class="block text-gray-700 font-medium mb-2" for="outputFormat">Output Format</label>
<select id="outputFormat" class="w-full p-3 border border-gray-300 rounded-lg focus:ring-2 focus:ring-indigo-500 focus:border-indigo-500">
<option value="json">JSON</option>
<option value="markdown">Markdown</option>
<option value="text">Plain Text</option>
<option value="formatted">Formatted Text</option>
</select>
</div>
<button id="processBtn" class="bg-indigo-600 hover:bg-indigo-700 text-white font-medium py-3 px-6 rounded-lg transition duration-200 flex items-center justify-center mt-6 sm:mt-auto">
<i data-feather="cpu" class="mr-2"></i> Process Files
</button>
</div>
<div id="resultsSection" class="hidden">
<div class="flex justify-between items-center mb-4">
<h2 class="text-xl font-semibold text-gray-800">Extracted Content</h2>
<button id="downloadAllBtn" class="bg-green-600 hover:bg-green-700 text-white font-medium py-2 px-4 rounded-lg transition duration-200 flex items-center">
<i data-feather="download" class="mr-2"></i> Download All
</button>
</div>
<div id="resultsContainer" class="space-y-6"></div>
</div>
</section>
</main>
<custom-footer></custom-footer>
<script src="components/navbar.js"></script>
<script src="components/footer.js"></script>
<script>
feather.replace();
</script>
<script src="https://huggingface.co/deepsite/deepsite-badge.js"></script>
</body>
</html> |