File size: 4,986 Bytes
5d66d3c
 
 
 
 
 
 
 
 
 
e010252
 
 
 
 
 
5d66d3c
e010252
 
 
 
 
 
 
 
 
 
 
 
 
5d66d3c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3a12466
 
5d66d3c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
<!DOCTYPE html>
<html lang="en">
<head>
    <meta charset="UTF-8">
    <meta name="viewport" content="width=device-width, initial-scale=1.0">
    <title>DocExtractor Pro - Convert Files to Text Formats</title>
    <link rel="stylesheet" href="style.css">
    <script src="https://cdn.tailwindcss.com"></script>
    <script src="https://cdn.jsdelivr.net/npm/feather-icons/dist/feather.min.js"></script>
    <script src="https://unpkg.com/feather-icons"></script>
    <!-- Enhanced PDF Processing -->
    <script src="https://cdnjs.cloudflare.com/ajax/libs/pdf.js/3.11.174/pdf.min.js"></script>
    <script src="https://cdnjs.cloudflare.com/ajax/libs/pdf-lib/1.17.1/pdf-lib.min.js"></script>
    
    <!-- Enhanced Document Processing -->
    <script src="https://cdnjs.cloudflare.com/ajax/libs/mammoth/1.6.0/mammoth.browser.min.js"></script>
    <script src="https://cdnjs.cloudflare.com/ajax/libs/xlsx/0.18.5/xlsx.full.min.js"></script>
    
    <!-- Enhanced OCR with Multiple Languages -->
    <script src="https://cdn.jsdelivr.net/npm/tesseract.js@4/dist/tesseract.min.js"></script>
    <script src="https://unpkg.com/ocr-space-api@1.0.0/dist/ocr-space-api.min.js"></script>
    
    <!-- Image Processing -->
    <script src="https://cdnjs.cloudflare.com/ajax/libs/cropperjs/1.6.1/cropper.min.js"></script>
    <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/cropperjs/1.6.1/cropper.min.css">
    
    <!-- Additional Language Support -->
    <script src="https://cdn.jsdelivr.net/npm/he@1.2.0/he.js"></script>
    <script src="https://cdnjs.cloudflare.com/ajax/libs/iconv-lite/0.6.3/iconv-lite.min.js"></script>
<script src="script.js"></script>
</head>
<body class="bg-gray-50 min-h-screen">
    <custom-navbar></custom-navbar>
    
    <main class="container mx-auto px-4 py-8">
        <section class="max-w-4xl mx-auto bg-white rounded-xl shadow-md overflow-hidden p-6 mb-8">
            <h1 class="text-3xl font-bold text-gray-800 mb-4">Document Extractor</h1>
            <p class="text-gray-600 mb-6">Upload PDF, Word, or Excel files to extract text with formatting, tables, and images (OCR). Convert to JSON or Markdown formats.</p>
            
            <div class="border-2 border-dashed border-gray-300 rounded-lg p-8 text-center mb-6">
                <input type="file" id="fileInput" class="hidden" accept=".pdf,.docx,.xlsx,.xls,.doc,.png,.jpg,.jpeg" multiple>
                <button id="uploadBtn" class="bg-indigo-600 hover:bg-indigo-700 text-white font-medium py-3 px-6 rounded-lg transition duration-200 flex items-center mx-auto">
                    <i data-feather="upload" class="mr-2"></i> Choose Files
                </button>
                <p class="text-gray-500 mt-3">Supported formats: PDF, DOCX, XLSX, JPG, PNG</p>
            </div>
            
            <div id="filePreviewContainer" class="hidden">
                <h2 class="text-xl font-semibold text-gray-800 mb-4">Selected Files</h2>
                <div id="filePreviewList" class="space-y-3 mb-6"></div>
            </div>
            
            <div class="flex flex-col sm:flex-row gap-4 mb-6">
                <div class="w-full">
                    <label class="block text-gray-700 font-medium mb-2" for="outputFormat">Output Format</label>
                    <select id="outputFormat" class="w-full p-3 border border-gray-300 rounded-lg focus:ring-2 focus:ring-indigo-500 focus:border-indigo-500">
                        <option value="json">JSON</option>
                        <option value="markdown">Markdown</option>
                        <option value="text">Plain Text</option>
                        <option value="formatted">Formatted Text</option>
</select>
                </div>
                <button id="processBtn" class="bg-indigo-600 hover:bg-indigo-700 text-white font-medium py-3 px-6 rounded-lg transition duration-200 flex items-center justify-center mt-6 sm:mt-auto">
                    <i data-feather="cpu" class="mr-2"></i> Process Files
                </button>
            </div>
            
            <div id="resultsSection" class="hidden">
                <div class="flex justify-between items-center mb-4">
                    <h2 class="text-xl font-semibold text-gray-800">Extracted Content</h2>
                    <button id="downloadAllBtn" class="bg-green-600 hover:bg-green-700 text-white font-medium py-2 px-4 rounded-lg transition duration-200 flex items-center">
                        <i data-feather="download" class="mr-2"></i> Download All
                    </button>
                </div>
                <div id="resultsContainer" class="space-y-6"></div>
            </div>
        </section>
    </main>

    <custom-footer></custom-footer>
    
    <script src="components/navbar.js"></script>
    <script src="components/footer.js"></script>
    <script>
        feather.replace();
    </script>
<script src="https://huggingface.co/deepsite/deepsite-badge.js"></script>
</body>
</html>