Spaces:
Running
Running
| <html lang="en"> | |
| <head> | |
| <meta charset="UTF-8"> | |
| <meta name="viewport" content="width=device-width, initial-scale=1.0"> | |
| <meta name="theme-color" content="#000000"> | |
| <meta name="description" content="Convert documents to clean Markdown format"> | |
| <title>Basic Document Converter | PDF, EPUB, DOCX & PPTX to Markdown</title> | |
| <link rel="manifest" href="/manifest.webmanifest"> | |
| <script src="https://cdn.tailwindcss.com"></script> | |
| <script src="https://cdnjs.cloudflare.com/ajax/libs/pdf.js/2.12.313/pdf.min.js"></script> | |
| <script src="https://cdnjs.cloudflare.com/ajax/libs/showdown/2.1.0/showdown.min.js"></script> | |
| <script src="https://cdn.jsdelivr.net/npm/jszip@3.10.1/dist/jszip.min.js"></script> | |
| <script src="https://cdnjs.cloudflare.com/ajax/libs/mammoth/1.4.0/mammoth.browser.min.js"></script> | |
| <script src="https://cdn.jsdelivr.net/npm/pptx2md@1.4.1/dist/pptx2md.min.js"></script> | |
| <script src="https://kit.fontawesome.com/a076d05399.js" crossorigin="anonymous"></script> | |
| <style> | |
| @import url('https://fonts.googleapis.com/css2?family=Space+Grotesk:wght@400;500;700&display=swap'); | |
| body { | |
| font-family: 'Space Grotesk', sans-serif; | |
| background-color: #F5F5F5; | |
| } | |
| .neo-border { | |
| border: 3px solid #000; | |
| box-shadow: 8px 8px 0 #000; | |
| } | |
| .neo-border-thick { | |
| border: 4px solid #000; | |
| } | |
| .neo-border-thin { | |
| border: 2px solid #000; | |
| } | |
| .neo-tab-active { | |
| border-bottom: 4px solid #000; | |
| font-weight: 700; | |
| } | |
| .neo-progress { | |
| background-color: #E0E0E0; | |
| border: 2px solid #000; | |
| } | |
| .neo-progress-bar { | |
| background-color: #000; | |
| } | |
| .dropzone { | |
| border: 3px dashed #000; | |
| transition: all 0.2s; | |
| } | |
| .dropzone.active { | |
| background-color: #FFF0F0; | |
| } | |
| .document-preview-container { | |
| display: grid; | |
| grid-template-columns: 1fr; | |
| gap: 1rem; | |
| height: 500px; | |
| } | |
| .document-preview { | |
| border: 3px solid #000; | |
| background-color: white; | |
| overflow-y: auto; | |
| height: 100%; | |
| } | |
| .page-canvas { | |
| border: 2px solid #000; | |
| margin-bottom: 1rem; | |
| max-width: 100%; | |
| } | |
| .neo-btn { | |
| border: 3px solid #000; | |
| font-weight: 700; | |
| letter-spacing: -0.5px; | |
| transition: all 0.2s; | |
| } | |
| .neo-btn:hover { | |
| transform: translate(-2px, -2px); | |
| box-shadow: 4px 4px 0 #000; | |
| } | |
| .neo-btn:active { | |
| transform: translate(0, 0); | |
| box-shadow: none; | |
| } | |
| .neo-btn-primary { | |
| background-color: #000; | |
| color: white; | |
| } | |
| .neo-btn-secondary { | |
| background-color: white; | |
| color: black; | |
| } | |
| .neo-checkbox { | |
| -webkit-appearance: none; | |
| -moz-appearance: none; | |
| appearance: none; | |
| width: 20px; | |
| height: 20px; | |
| border: 3px solid #000; | |
| margin-right: 8px; | |
| position: relative; | |
| top: 4px; | |
| } | |
| .neo-checkbox:checked { | |
| background-color: #000; | |
| background-image: url("data:image/svg+xml,%3Csvg xmlns='http://www.w3.org/2000/svg' width='16' height='16' fill='white' viewBox='0 0 16 16'%3E%3Cpath d='M12.736 3.97a.733.733 0 0 1 1.047 0c.286.289.29.756.01 1.05L7.88 12.01a.733.733 0 0 1-1.065.02L3.217 8.384a.757.757 0 0 1 0-1.06.733.733 0 0 1 1.047 0l3.052 3.093 5.4-6.425a.247.247 0 0 1 .02-.022Z'/%3E%3C/svg%3E"); | |
| background-repeat: no-repeat; | |
| background-position: center; | |
| } | |
| #markdownOutput { | |
| font-family: 'Space Mono', monospace; | |
| white-space: pre-wrap; | |
| background-color: white; | |
| border: 3px solid #000; | |
| padding: 1rem; | |
| height: 100%; | |
| overflow-y: auto; | |
| } | |
| #installBtn { | |
| position: fixed; | |
| bottom: 1rem; | |
| right: 1rem; | |
| z-index: 100; | |
| } | |
| .preview-slide { | |
| width: 100%; | |
| background-color: white; | |
| padding: 1rem; | |
| border: 2px solid #000; | |
| margin-bottom: 1rem; | |
| } | |
| .file-info { | |
| display: flex; | |
| align-items: center; | |
| gap: 0.5rem; | |
| margin-bottom: 1rem; | |
| } | |
| .file-icon { | |
| font-size: 1.5rem; | |
| } | |
| @media (max-width: 768px) { | |
| .container { | |
| padding: 1rem; | |
| } | |
| .neo-border { | |
| box-shadow: 4px 4px 0 #000; | |
| } | |
| .document-preview-container { | |
| grid-template-columns: 1fr; | |
| height: auto; | |
| } | |
| .document-preview { | |
| height: 300px; | |
| } | |
| } | |
| </style> | |
| </head> | |
| <body class="min-h-screen"> | |
| <div class="container mx-auto px-4 py-12 max-w-6xl"> | |
| <div class="text-center mb-12"> | |
| <h1 class="text-4xl font-bold mb-4 tracking-tight">BASIC DOCUMENT CONVERTER</h1> | |
| <p class="text-xl">TRANSFORM PDF, EPUB, DOCX & PPTX TO CLEAN MARKDOWN</p> | |
| </div> | |
| <div class="neo-border bg-white mb-8"> | |
| <div class="p-8"> | |
| <div class="grid grid-cols-1 lg:grid-cols-2 gap-8"> | |
| <!-- Upload Section --> | |
| <div> | |
| <div id="dropzone" class="dropzone rounded-none p-12 text-center cursor-pointer mb-8"> | |
| <div id="uploadContent" class="flex flex-col items-center justify-center"> | |
| <i class="fas fa-file-upload text-5xl mb-4"></i> | |
| <h3 class="text-xl font-bold mb-2">DRAG & DROP ANY DOCUMENT HERE</h3> | |
| <p class="mb-6">SUPPORTS PDF, EPUB, DOCX & PPTX</p> | |
| <input type="file" id="fileInput" accept=".pdf,.epub,.docx,.pptx" class="hidden"> | |
| <button id="browseBtn" class="neo-btn neo-btn-primary px-6 py-3"> | |
| SELECT DOCUMENT | |
| </button> | |
| </div> | |
| </div> | |
| <!-- File Info --> | |
| <div id="fileInfoContainer" class="neo-border-thin p-4 mb-4 bg-white hidden"> | |
| <div class="file-info"> | |
| <i id="fileIcon" class="file-icon"></i> | |
| <div> | |
| <h3 id="fileName" class="font-bold"></h3> | |
| <p id="fileType" class="text-sm"></p> | |
| </div> | |
| </div> | |
| </div> | |
| <!-- Options --> | |
| <div class="neo-border-thin p-6 mb-8 bg-white"> | |
| <h3 class="font-bold text-lg mb-4">CONVERSION OPTIONS</h3> | |
| <div class="space-y-4"> | |
| <div class="flex items-start"> | |
| <input type="checkbox" id="preserveLayout" class="neo-checkbox" checked> | |
| <label for="preserveLayout" class="text-base">PRESERVE LAYOUT STRUCTURE</label> | |
| </div> | |
| <div class="flex items-start"> | |
| <input type="checkbox" id="detectHeadings" class="neo-checkbox" checked> | |
| <label for="detectHeadings" class="text-base">AUTO-DETECT HEADINGS</label> | |
| </div> | |
| <div class="flex items-start"> | |
| <input type="checkbox" id="includeMetadata" class="neo-checkbox" checked> | |
| <label for="includeMetadata" class="text-base">INCLUDE DOCUMENT METADATA</label> | |
| </div> | |
| </div> | |
| </div> | |
| <!-- Progress --> | |
| <div id="progressContainer" class="hidden"> | |
| <div class="flex justify-between mb-2"> | |
| <span class="font-bold">CONVERSION PROGRESS</span> | |
| <span id="progressPercent" class="font-bold">0%</span> | |
| </div> | |
| <div class="neo-progress w-full h-3 mb-2"> | |
| <div id="progressBar" class="neo-progress-bar h-full" style="width: 0%"></div> | |
| </div> | |
| <p id="progressText" class="font-medium">PROCESSING DOCUMENT...</p> | |
| </div> | |
| </div> | |
| <!-- Preview Section --> | |
| <div> | |
| <div class="flex justify-between items-center mb-4"> | |
| <h3 class="font-bold text-lg">MARKDOWN OUTPUT</h3> | |
| <div class="flex space-x-3"> | |
| <button id="copyBtn" class="neo-btn neo-btn-secondary px-4 py-2 text-sm hidden"> | |
| <i class="fas fa-copy mr-1"></i> COPY | |
| </button> | |
| <button id="downloadBtn" class="neo-btn neo-btn-primary px-4 py-2 text-sm hidden"> | |
| <i class="fas fa-download mr-1"></i> DOWNLOAD | |
| </button> | |
| </div> | |
| </div> | |
| <div class="document-preview-container"> | |
| <div id="documentPreview" class="document-preview p-4"> | |
| <div id="previewContent" class="flex flex-col items-center"></div> | |
| </div> | |
| <div id="markdownOutput" class="document-preview p-4"></div> | |
| </div> | |
| </div> | |
| </div> | |
| </div> | |
| </div> | |
| <div class="text-center font-medium"> | |
| <p>THIS TOOL WORKS ENTIRELY IN YOUR BROWSER. YOUR FILES ARE NEVER UPLOADED TO ANY SERVER.</p> | |
| </div> | |
| </div> | |
| <!-- Install button (hidden by default) --> | |
| <button id="installBtn" class="neo-btn neo-btn-primary px-6 py-3 hidden"> | |
| <i class="fas fa-download mr-2"></i> INSTALL APP | |
| </button> | |
| <!-- Service Worker Registration --> | |
| <script> | |
| // Initialize PDF.js worker | |
| pdfjsLib.GlobalWorkerOptions.workerSrc = 'https://cdnjs.cloudflare.com/ajax/libs/pdf.js/2.12.313/pdf.worker.min.js'; | |
| // PWA Installation | |
| let deferredPrompt; | |
| const installBtn = document.getElementById('installBtn'); | |
| window.addEventListener('beforeinstallprompt', (e) => { | |
| e.preventDefault(); | |
| deferredPrompt = e; | |
| installBtn.classList.remove('hidden'); | |
| }); | |
| installBtn.addEventListener('click', async () => { | |
| if (!deferredPrompt) return; | |
| deferredPrompt.prompt(); | |
| const { outcome } = await deferredPrompt.userChoice; | |
| installBtn.classList.add('hidden'); | |
| deferredPrompt = null; | |
| }); | |
| window.addEventListener('appinstalled', () => { | |
| installBtn.classList.add('hidden'); | |
| deferredPrompt = null; | |
| }); | |
| // Check if the app is running as a PWA | |
| if (window.matchMedia('(display-mode: standalone)').matches || window.navigator.standalone) { | |
| console.log('Running as PWA'); | |
| } | |
| // Register Service Worker | |
| if ('serviceWorker' in navigator) { | |
| window.addEventListener('load', () => { | |
| navigator.serviceWorker.register('/sw.js').then(registration => { | |
| console.log('ServiceWorker registration successful'); | |
| }).catch(err => { | |
| console.log('ServiceWorker registration failed: ', err); | |
| }); | |
| }); | |
| } | |
| // Generate manifest dynamically | |
| const manifest = { | |
| "name": "Universal Document Converter", | |
| "short_name": "DocConvert", | |
| "description": "Convert documents to clean Markdown format", | |
| "start_url": "/", | |
| "display": "standalone", | |
| "background_color": "#F5F5F5", | |
| "theme_color": "#000000", | |
| "icons": [ | |
| { | |
| "src": "icon-192x192.png", | |
| "sizes": "192x192", | |
| "type": "image/png" | |
| }, | |
| { | |
| "src": "icon-512x512.png", | |
| "sizes": "512x512", | |
| "type": "image/png" | |
| }, | |
| { | |
| "src": "icon-maskable-192x192.png", | |
| "sizes": "192x192", | |
| "type": "image/png", | |
| "purpose": "maskable" | |
| }, | |
| { | |
| "src": "icon-maskable-512x512.png", | |
| "sizes": "512x512", | |
| "type": "image/png", | |
| "purpose": "maskable" | |
| } | |
| ] | |
| }; | |
| // Create a blob URL for the manifest | |
| const manifestBlob = new Blob([JSON.stringify(manifest)], { type: 'application/json' }); | |
| const manifestUrl = URL.createObjectURL(manifestBlob); | |
| // Create a link element for the manifest | |
| const manifestLink = document.createElement('link'); | |
| manifestLink.rel = 'manifest'; | |
| manifestLink.href = manifestUrl; | |
| document.head.appendChild(manifestLink); | |
| // Main application code | |
| document.addEventListener('DOMContentLoaded', function() { | |
| // DOM elements | |
| const fileInput = document.getElementById('fileInput'); | |
| const browseBtn = document.getElementById('browseBtn'); | |
| const dropzone = document.getElementById('dropzone'); | |
| const markdownOutput = document.getElementById('markdownOutput'); | |
| const copyBtn = document.getElementById('copyBtn'); | |
| const downloadBtn = document.getElementById('downloadBtn'); | |
| const progressContainer = document.getElementById('progressContainer'); | |
| const progressBar = document.getElementById('progressBar'); | |
| const progressPercent = document.getElementById('progressPercent'); | |
| const progressText = document.getElementById('progressText'); | |
| const documentPreview = document.getElementById('documentPreview'); | |
| const previewContent = document.getElementById('previewContent'); | |
| const fileInfoContainer = document.getElementById('fileInfoContainer'); | |
| const fileName = document.getElementById('fileName'); | |
| const fileType = document.getElementById('fileType'); | |
| const fileIcon = document.getElementById('fileIcon'); | |
| // State variables | |
| let currentMarkdown = ''; | |
| let currentFilename = ''; | |
| let currentFileType = ''; | |
| let pdfPages = []; | |
| let epubItems = []; | |
| let docxPages = []; | |
| let pptxSlides = []; | |
| // Set up drag and drop | |
| ['dragenter', 'dragover', 'dragleave', 'drop'].forEach(eventName => { | |
| dropzone.addEventListener(eventName, preventDefaults, false); | |
| }); | |
| function preventDefaults(e) { | |
| e.preventDefault(); | |
| e.stopPropagation(); | |
| } | |
| ['dragenter', 'dragover'].forEach(eventName => { | |
| dropzone.addEventListener(eventName, highlight, false); | |
| }); | |
| ['dragleave', 'drop'].forEach(eventName => { | |
| dropzone.addEventListener(eventName, unhighlight, false); | |
| }); | |
| function highlight() { | |
| dropzone.classList.add('active'); | |
| } | |
| function unhighlight() { | |
| dropzone.classList.remove('active'); | |
| } | |
| dropzone.addEventListener('drop', handleDrop, false); | |
| // Fix for the browse button | |
| browseBtn.addEventListener('click', function(e) { | |
| e.preventDefault(); | |
| fileInput.click(); | |
| }); | |
| fileInput.addEventListener('change', function(e) { | |
| handleFiles(e); | |
| }); | |
| function handleDrop(e) { | |
| const dt = e.dataTransfer; | |
| const file = dt.files[0]; | |
| if (file) { | |
| const event = { target: { files: [file] } }; | |
| handleFiles(event); | |
| } | |
| } | |
| // Copy to clipboard | |
| copyBtn.addEventListener('click', () => { | |
| navigator.clipboard.writeText(currentMarkdown).then(() => { | |
| const originalText = copyBtn.innerHTML; | |
| copyBtn.innerHTML = '<i class="fas fa-check mr-1"></i> COPIED!'; | |
| setTimeout(() => { | |
| copyBtn.innerHTML = originalText; | |
| }, 2000); | |
| }); | |
| }); | |
| // Download markdown file | |
| downloadBtn.addEventListener('click', () => { | |
| const blob = new Blob([currentMarkdown], { type: 'text/markdown' }); | |
| const url = URL.createObjectURL(blob); | |
| const a = document.createElement('a'); | |
| a.href = url; | |
| a.download = `${currentFilename}.md`; | |
| document.body.appendChild(a); | |
| a.click(); | |
| document.body.removeChild(a); | |
| URL.revokeObjectURL(url); | |
| }); | |
| function handleFiles(e) { | |
| const file = e.target.files[0]; | |
| if (!file) return; | |
| // Reset state | |
| currentFilename = file.name.replace(/\.[^/.]+$/, "") || 'converted'; | |
| currentMarkdown = ''; | |
| markdownOutput.textContent = ''; | |
| previewContent.innerHTML = ''; | |
| pdfPages = []; | |
| epubItems = []; | |
| docxPages = []; | |
| pptxSlides = []; | |
| // Determine file type | |
| if (file.name.endsWith('.pdf')) { | |
| currentFileType = 'pdf'; | |
| fileIcon.className = 'file-icon fas fa-file-pdf'; | |
| fileType.textContent = 'PDF Document'; | |
| } else if (file.name.endsWith('.epub')) { | |
| currentFileType = 'epub'; | |
| fileIcon.className = 'file-icon fas fa-book-open'; | |
| fileType.textContent = 'EPUB eBook'; | |
| } else if (file.name.endsWith('.docx')) { | |
| currentFileType = 'docx'; | |
| fileIcon.className = 'file-icon fas fa-file-word'; | |
| fileType.textContent = 'Word Document'; | |
| } else if (file.name.endsWith('.pptx')) { | |
| currentFileType = 'pptx'; | |
| fileIcon.className = 'file-icon fas fa-file-powerpoint'; | |
| fileType.textContent = 'PowerPoint Presentation'; | |
| } else { | |
| alert('Unsupported file type. Please upload a PDF, EPUB, DOCX or PPTX file.'); | |
| return; | |
| } | |
| // Show file info | |
| fileName.textContent = file.name; | |
| fileInfoContainer.classList.remove('hidden'); | |
| // Process the file | |
| processFile(file); | |
| } | |
| async function processFile(file) { | |
| try { | |
| // Show progress | |
| progressContainer.classList.remove('hidden'); | |
| progressBar.style.width = '0%'; | |
| progressPercent.textContent = '0%'; | |
| progressText.textContent = 'PROCESSING DOCUMENT...'; | |
| // Clear previous content | |
| markdownOutput.textContent = 'LOADING...'; | |
| previewContent.innerHTML = ''; | |
| // Dispatch to appropriate converter | |
| switch(currentFileType) { | |
| case 'pdf': | |
| await convertPdfToMarkdown(file); | |
| break; | |
| case 'epub': | |
| await convertEpubToMarkdown(file); | |
| break; | |
| case 'docx': | |
| await convertDocxToMarkdown(file); | |
| break; | |
| case 'pptx': | |
| await convertPptxToMarkdown(file); | |
| break; | |
| } | |
| // Show action buttons | |
| copyBtn.classList.remove('hidden'); | |
| downloadBtn.classList.remove('hidden'); | |
| // Update progress | |
| progressBar.style.width = '100%'; | |
| progressPercent.textContent = '100%'; | |
| progressText.textContent = 'CONVERSION COMPLETE!'; | |
| } catch (error) { | |
| console.error(`Error converting ${currentFileType}:`, error); | |
| markdownOutput.textContent = `ERROR: ${error.message}`; | |
| progressText.textContent = 'CONVERSION FAILED'; | |
| } | |
| } | |
| async function convertPdfToMarkdown(file) { | |
| const arrayBuffer = await file.arrayBuffer(); | |
| const loadingTask = pdfjsLib.getDocument(arrayBuffer); | |
| const pdf = await loadingTask.promise; | |
| let markdownContent = ''; | |
| const totalPages = pdf.numPages; | |
| // Process each page | |
| for (let i = 1; i <= totalPages; i++) { | |
| const page = await pdf.getPage(i); | |
| // Update progress | |
| const progress = Math.round((i / totalPages) * 100); | |
| progressBar.style.width = `${progress}%`; | |
| progressPercent.textContent = `${progress}%`; | |
| progressText.textContent = `PROCESSING PAGE ${i} OF ${totalPages}...`; | |
| // Get text content | |
| const textContent = await page.getTextContent(); | |
| const pageText = textContent.items.map(item => item.str).join(' '); | |
| // Add page separator if not first page | |
| if (i > 1) { | |
| markdownContent += `\n\n---\n\n`; | |
| } | |
| // Add page number | |
| markdownContent += `# PAGE ${i}\n\n`; | |
| // Add the text content | |
| markdownContent += pageText; | |
| // Render PDF preview | |
| const viewport = page.getViewport({ scale: 0.8 }); | |
| const canvas = document.createElement('canvas'); | |
| const context = canvas.getContext('2d'); | |
| canvas.height = viewport.height; | |
| canvas.width = viewport.width; | |
| canvas.className = 'page-canvas'; | |
| await page.render({ | |
| canvasContext: context, | |
| viewport: viewport | |
| }).promise; | |
| pdfPages.push(canvas); | |
| } | |
| // Display preview | |
| pdfPages.forEach(page => { | |
| previewContent.appendChild(page); | |
| }); | |
| // Post-process the markdown | |
| currentMarkdown = postProcessMarkdown(markdownContent); | |
| markdownOutput.textContent = currentMarkdown; | |
| } | |
| async function convertEpubToMarkdown(file) { | |
| const arrayBuffer = await file.arrayBuffer(); | |
| const zip = await JSZip.loadAsync(arrayBuffer); | |
| let markdownContent = ''; | |
| // Get container.xml to find the rootfile | |
| const containerData = await zip.file('META-INF/container.xml').async('text'); | |
| const rootFilePath = containerData.match(/<rootfile[^>]*full-path="([^"]*)"/)[1]; | |
| // Parse the rootfile (usually content.opf) | |
| const rootFileData = await zip.file(rootFilePath).async('text'); | |
| const parser = new DOMParser(); | |
| const opfDoc = parser.parseFromString(rootFileData, 'application/xml'); | |
| // Extract metadata if enabled | |
| if (document.getElementById('includeMetadata').checked) { | |
| const metadata = opfDoc.querySelector('metadata'); | |
| if (metadata) { | |
| markdownContent += '# EPUB METADATA\n\n'; | |
| const title = metadata.querySelector('title')?.textContent; | |
| if (title) markdownContent += `**TITLE:** ${title}\n\n`; | |
| const creator = metadata.querySelector('creator')?.textContent; | |
| if (creator) markdownContent += `**AUTHOR:** ${creator}\n\n`; | |
| const date = metadata.querySelector('date')?.textContent; | |
| if (date) markdownContent += `**DATE:** ${date}\n\n`; | |
| const publisher = metadata.querySelector('publisher')?.textContent; | |
| if (publisher) markdownContent += `**PUBLISHER:** ${publisher}\n\n`; | |
| markdownContent += '---\n\n'; | |
| } | |
| } | |
| // Get the manifest (list of all files) | |
| const manifest = {}; | |
| opfDoc.querySelectorAll('manifest item').forEach(item => { | |
| manifest[item.getAttribute('id')] = item.getAttribute('href'); | |
| }); | |
| // Get the spine (reading order) | |
| const spineItems = opfDoc.querySelectorAll('spine itemref'); | |
| const totalItems = spineItems.length; | |
| let processedItems = 0; | |
| // Process each spine item | |
| for (const item of spineItems) { | |
| const idref = item.getAttribute('idref'); | |
| const href = manifest[idref]; | |
| if (!href) continue; | |
| // Get the full path to the content file | |
| const contentPath = rootFilePath.split('/').slice(0, -1).join('/'); | |
| const fullPath = contentPath ? `${contentPath}/${href}` : href; | |
| // Update progress | |
| processedItems++; | |
| const progress = Math.round((processedItems / totalItems) * 100); | |
| progressBar.style.width = `${progress}%`; | |
| progressPercent.textContent = `${progress}%`; | |
| progressText.textContent = `PROCESSING ITEM ${processedItems} OF ${totalItems}...`; | |
| // Read the content file | |
| const contentFile = zip.file(fullPath); | |
| if (!contentFile) continue; | |
| const content = await contentFile.async('text'); | |
| // Parse HTML content | |
| const contentDoc = parser.parseFromString(content, 'text/html'); | |
| // Remove scripts and styles | |
| contentDoc.querySelectorAll('script, style').forEach(el => el.remove()); | |
| // Convert to text with basic formatting | |
| let itemContent = ''; | |
| // Process headings | |
| contentDoc.querySelectorAll('h1, h2, h3, h4, h5, h6').forEach(heading => { | |
| const level = parseInt(heading.tagName.substring(1)); | |
| itemContent += `${'#'.repeat(level)} ${heading.textContent}\n\n`; | |
| }); | |
| // Process paragraphs | |
| contentDoc.querySelectorAll('p').forEach(p => { | |
| itemContent += `${p.textContent}\n\n`; | |
| }); | |
| // Process lists | |
| contentDoc.querySelectorAll('ul, ol').forEach(list => { | |
| const isOrdered = list.tagName === 'OL'; | |
| list.querySelectorAll('li').forEach((li, index) => { | |
| const prefix = isOrdered ? `${index + 1}.` : '-'; | |
| itemContent += `${prefix} ${li.textContent}\n`; | |
| }); | |
| itemContent += '\n'; | |
| }); | |
| // Add to markdown content | |
| markdownContent += itemContent; | |
| // Add separator if not last item | |
| if (processedItems < totalItems) { | |
| markdownContent += '\n---\n\n'; | |
| } | |
| // Create preview element | |
| const previewDiv = document.createElement('div'); | |
| previewDiv.className = 'preview-slide'; | |
| const previewTitle = document.createElement('h4'); | |
| previewTitle.className = 'font-bold mb-2'; | |
| previewTitle.textContent = href; | |
| const previewContentDiv = document.createElement('div'); | |
| previewContentDiv.className = 'text-sm'; | |
| previewContentDiv.textContent = itemContent.substring(0, 500) + (itemContent.length > 500 ? '...' : ''); | |
| previewDiv.appendChild(previewTitle); | |
| previewDiv.appendChild(previewContentDiv); | |
| epubItems.push(previewDiv); | |
| } | |
| // Display preview | |
| epubItems.forEach(item => { | |
| previewContent.appendChild(item); | |
| }); | |
| // Post-process the markdown | |
| currentMarkdown = postProcessMarkdown(markdownContent); | |
| markdownOutput.textContent = currentMarkdown; | |
| } | |
| async function convertDocxToMarkdown(file) { | |
| const arrayBuffer = await file.arrayBuffer(); | |
| // Convert DOCX to Markdown using mammoth.js | |
| const result = await mammoth.extractRawText({ arrayBuffer: arrayBuffer }); | |
| // Get the raw text | |
| let markdownContent = result.value; | |
| // Split into pages (simulated - DOCX doesn't have pages) | |
| const pageSize = 2000; // Approximate characters per page | |
| const pageCount = Math.ceil(markdownContent.length / pageSize); | |
| // Create preview elements | |
| for (let i = 0; i < pageCount; i++) { | |
| const start = i * pageSize; | |
| const end = start + pageSize; | |
| const pageText = markdownContent.substring(start, end); | |
| const previewDiv = document.createElement('div'); | |
| previewDiv.className = 'preview-slide'; | |
| const previewTitle = document.createElement('h4'); | |
| previewTitle.className = 'font-bold mb-2'; | |
| previewTitle.textContent = `Page ${i + 1}`; | |
| const previewContentDiv = document.createElement('div'); | |
| previewContentDiv.className = 'text-sm'; | |
| previewContentDiv.textContent = pageText.substring(0, 500) + (pageText.length > 500 ? '...' : ''); | |
| previewDiv.appendChild(previewTitle); | |
| previewDiv.appendChild(previewContentDiv); | |
| docxPages.push(previewDiv); | |
| // Update progress | |
| const progress = Math.round((i / pageCount) * 100); | |
| progressBar.style.width = `${progress}%`; | |
| progressPercent.textContent = `${progress}%`; | |
| progressText.textContent = `PROCESSING DOCUMENT...`; | |
| } | |
| // Display preview | |
| docxPages.forEach(page => { | |
| previewContent.appendChild(page); | |
| }); | |
| // Post-process the markdown | |
| currentMarkdown = postProcessMarkdown(markdownContent); | |
| markdownOutput.textContent = currentMarkdown; | |
| } | |
| async function convertPptxToMarkdown(file) { | |
| const arrayBuffer = await file.arrayBuffer(); | |
| // Convert PPTX to Markdown using pptx2md | |
| const result = await pptx2md(arrayBuffer); | |
| // Get the markdown content | |
| let markdownContent = result.markdown; | |
| // Create preview elements for each slide | |
| result.slides.forEach((slide, index) => { | |
| const previewDiv = document.createElement('div'); | |
| previewDiv.className = 'preview-slide'; | |
| const previewTitle = document.createElement('h4'); | |
| previewTitle.className = 'font-bold mb-2'; | |
| previewTitle.textContent = `Slide ${index + 1}`; | |
| const previewContentDiv = document.createElement('div'); | |
| previewContentDiv.className = 'text-sm'; | |
| // Create a simplified preview of the slide content | |
| let previewText = ''; | |
| if (slide.title) previewText += `# ${slide.title}\n\n`; | |
| if (slide.notes) previewText += `${slide.notes}\n\n`; | |
| if (slide.bodies && slide.bodies.length > 0) { | |
| previewText += slide.bodies.map(body => body.text).join('\n\n'); | |
| } | |
| previewContentDiv.textContent = previewText.substring(0, 500) + (previewText.length > 500 ? '...' : ''); | |
| previewDiv.appendChild(previewTitle); | |
| previewDiv.appendChild(previewContentDiv); | |
| pptxSlides.push(previewDiv); | |
| // Update progress | |
| const progress = Math.round((index / result.slides.length) * 100); | |
| progressBar.style.width = `${progress}%`; | |
| progressPercent.textContent = `${progress}%`; | |
| progressText.textContent = `PROCESSING SLIDE ${index + 1} OF ${result.slides.length}...`; | |
| }); | |
| // Display preview | |
| pptxSlides.forEach(slide => { | |
| previewContent.appendChild(slide); | |
| }); | |
| // Post-process the markdown | |
| currentMarkdown = postProcessMarkdown(markdownContent); | |
| markdownOutput.textContent = currentMarkdown; | |
| } | |
| function postProcessMarkdown(text) { | |
| // Simple markdown formatting | |
| let result = text; | |
| // Detect headings based on font size (simplified) | |
| if (document.getElementById('detectHeadings').checked) { | |
| // This is a simplified version - a real implementation would need more sophisticated detection | |
| result = result.replace(/(\n\n[A-Z][^\n]{10,}\n)/g, '\n\n## $1'); | |
| } | |
| // Preserve some layout structure | |
| if (document.getElementById('preserveLayout').checked) { | |
| // Convert multiple newlines to markdown paragraphs | |
| result = result.replace(/\n{3,}/g, '\n\n'); | |
| } | |
| return result; | |
| } | |
| }); | |
| </script> | |
| <!-- Inline Service Worker --> | |
| <script> | |
| // Register service worker with inline code | |
| if ('serviceWorker' in navigator) { | |
| const swCode = ` | |
| const CACHE_NAME = 'doc-converter-v3'; | |
| const ASSETS_TO_CACHE = [ | |
| '/', | |
| '/index.html', | |
| 'https://cdn.tailwindcss.com', | |
| 'https://cdnjs.cloudflare.com/ajax/libs/pdf.js/2.12.313/pdf.min.js', | |
| 'https://cdnjs.cloudflare.com/ajax/libs/pdf.js/2.12.313/pdf.worker.min.js', | |
| 'https://cdnjs.cloudflare.com/ajax/libs/showdown/2.1.0/showdown.min.js', | |
| 'https://cdn.jsdelivr.net/npm/jszip@3.10.1/dist/jszip.min.js', | |
| 'https://cdnjs.cloudflare.com/ajax/libs/mammoth/1.4.0/mammoth.browser.min.js', | |
| 'https://cdn.jsdelivr.net/npm/pptx2md@1.4.1/dist/pptx2md.min.js', | |
| 'https://kit.fontawesome.com/a076d05399.js', | |
| 'https://fonts.googleapis.com/css2?family=Space+Grotesk:wght@400;500;700&display=swap' | |
| ]; | |
| self.addEventListener('install', (event) => { | |
| event.waitUntil( | |
| caches.open(CACHE_NAME) | |
| .then((cache) => { | |
| return cache.addAll(ASSETS_TO_CACHE); | |
| }) | |
| ); | |
| }); | |
| self.addEventListener('fetch', (event) => { | |
| event.respondWith( | |
| caches.match(event.request) | |
| .then((response) => { | |
| return response || fetch(event.request); | |
| }) | |
| ); | |
| }); | |
| self.addEventListener('activate', (event) => { | |
| const cacheWhitelist = [CACHE_NAME]; | |
| event.waitUntil( | |
| caches.keys().then((cacheNames) => { | |
| return Promise.all( | |
| cacheNames.map((cacheName) => { | |
| if (cacheWhitelist.indexOf(cacheName) === -1) { | |
| return caches.delete(cacheName); | |
| } | |
| }) | |
| ); | |
| }) | |
| ); | |
| }); | |
| `; | |
| const blob = new Blob([swCode], { type: 'application/javascript' }); | |
| const swUrl = URL.createObjectURL(blob); | |
| navigator.serviceWorker.register(swUrl) | |
| .then(registration => { | |
| console.log('Service Worker registered with scope:', registration.scope); | |
| }) | |
| .catch(error => { | |
| console.log('Service Worker registration failed:', error); | |
| }); | |
| } | |
| </script> | |
| <p style="border-radius: 8px; text-align: center; font-size: 12px; color: #fff; margin-top: 16px;position: fixed; left: 8px; bottom: 8px; z-index: 10; background: rgba(0, 0, 0, 0.8); padding: 4px 8px;">Made with <img src="https://enzostvs-deepsite.hf.space/logo.svg" alt="DeepSite Logo" style="width: 16px; height: 16px; vertical-align: middle;display:inline-block;margin-right:3px;filter:brightness(0) invert(1);"><a href="https://enzostvs-deepsite.hf.space" style="color: #fff;text-decoration: underline;" target="_blank" >DeepSite</a> - 🧬 <a href="https://enzostvs-deepsite.hf.space?remix=ihansel/documentconversion" style="color: #fff;text-decoration: underline;" target="_blank" >Remix</a></p></body> | |
| </html> |