Spaces:
Running
Running
| <html lang="en"> | |
| <head> | |
| <meta charset="UTF-8"> | |
| <meta name="viewport" content="width=device-width, initial-scale=1.0"> | |
| <title>DocExtractor Pro - Convert Files to Text Formats</title> | |
| <link rel="stylesheet" href="style.css"> | |
| <script src="https://cdn.tailwindcss.com"></script> | |
| <script src="https://cdn.jsdelivr.net/npm/feather-icons/dist/feather.min.js"></script> | |
| <script src="https://unpkg.com/feather-icons"></script> | |
| <!-- Enhanced PDF Processing --> | |
| <script src="https://cdnjs.cloudflare.com/ajax/libs/pdf.js/3.11.174/pdf.min.js"></script> | |
| <script src="https://cdnjs.cloudflare.com/ajax/libs/pdf-lib/1.17.1/pdf-lib.min.js"></script> | |
| <!-- Enhanced Document Processing --> | |
| <script src="https://cdnjs.cloudflare.com/ajax/libs/mammoth/1.6.0/mammoth.browser.min.js"></script> | |
| <script src="https://cdnjs.cloudflare.com/ajax/libs/xlsx/0.18.5/xlsx.full.min.js"></script> | |
| <!-- Enhanced OCR with Multiple Languages --> | |
| <script src="https://cdn.jsdelivr.net/npm/tesseract.js@4/dist/tesseract.min.js"></script> | |
| <script src="https://unpkg.com/ocr-space-api@1.0.0/dist/ocr-space-api.min.js"></script> | |
| <!-- Image Processing --> | |
| <script src="https://cdnjs.cloudflare.com/ajax/libs/cropperjs/1.6.1/cropper.min.js"></script> | |
| <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/cropperjs/1.6.1/cropper.min.css"> | |
| <!-- Additional Language Support --> | |
| <script src="https://cdn.jsdelivr.net/npm/he@1.2.0/he.js"></script> | |
| <script src="https://cdnjs.cloudflare.com/ajax/libs/iconv-lite/0.6.3/iconv-lite.min.js"></script> | |
| <script src="script.js"></script> | |
| </head> | |
| <body class="bg-gray-50 min-h-screen"> | |
| <custom-navbar></custom-navbar> | |
| <main class="container mx-auto px-4 py-8"> | |
| <section class="max-w-4xl mx-auto bg-white rounded-xl shadow-md overflow-hidden p-6 mb-8"> | |
| <h1 class="text-3xl font-bold text-gray-800 mb-4">Document Extractor</h1> | |
| <p class="text-gray-600 mb-6">Upload PDF, Word, or Excel files to extract text with formatting, tables, and images (OCR). Convert to JSON or Markdown formats.</p> | |
| <div class="border-2 border-dashed border-gray-300 rounded-lg p-8 text-center mb-6"> | |
| <input type="file" id="fileInput" class="hidden" accept=".pdf,.docx,.xlsx,.xls,.doc,.png,.jpg,.jpeg" multiple> | |
| <button id="uploadBtn" class="bg-indigo-600 hover:bg-indigo-700 text-white font-medium py-3 px-6 rounded-lg transition duration-200 flex items-center mx-auto"> | |
| <i data-feather="upload" class="mr-2"></i> Choose Files | |
| </button> | |
| <p class="text-gray-500 mt-3">Supported formats: PDF, DOCX, XLSX, JPG, PNG</p> | |
| </div> | |
| <div id="filePreviewContainer" class="hidden"> | |
| <h2 class="text-xl font-semibold text-gray-800 mb-4">Selected Files</h2> | |
| <div id="filePreviewList" class="space-y-3 mb-6"></div> | |
| </div> | |
| <div class="flex flex-col sm:flex-row gap-4 mb-6"> | |
| <div class="w-full"> | |
| <label class="block text-gray-700 font-medium mb-2" for="outputFormat">Output Format</label> | |
| <select id="outputFormat" class="w-full p-3 border border-gray-300 rounded-lg focus:ring-2 focus:ring-indigo-500 focus:border-indigo-500"> | |
| <option value="json">JSON</option> | |
| <option value="markdown">Markdown</option> | |
| <option value="text">Plain Text</option> | |
| <option value="formatted">Formatted Text</option> | |
| </select> | |
| </div> | |
| <button id="processBtn" class="bg-indigo-600 hover:bg-indigo-700 text-white font-medium py-3 px-6 rounded-lg transition duration-200 flex items-center justify-center mt-6 sm:mt-auto"> | |
| <i data-feather="cpu" class="mr-2"></i> Process Files | |
| </button> | |
| </div> | |
| <div id="resultsSection" class="hidden"> | |
| <div class="flex justify-between items-center mb-4"> | |
| <h2 class="text-xl font-semibold text-gray-800">Extracted Content</h2> | |
| <button id="downloadAllBtn" class="bg-green-600 hover:bg-green-700 text-white font-medium py-2 px-4 rounded-lg transition duration-200 flex items-center"> | |
| <i data-feather="download" class="mr-2"></i> Download All | |
| </button> | |
| </div> | |
| <div id="resultsContainer" class="space-y-6"></div> | |
| </div> | |
| </section> | |
| </main> | |
| <custom-footer></custom-footer> | |
| <script src="components/navbar.js"></script> | |
| <script src="components/footer.js"></script> | |
| <script> | |
| feather.replace(); | |
| </script> | |
| <script src="https://huggingface.co/deepsite/deepsite-badge.js"></script> | |
| </body> | |
| </html> |