| | import React, { useState } from "react"; |
| | import { motion, AnimatePresence } from "framer-motion"; |
| | import { |
| | Download, |
| | Braces, |
| | FileCode2, |
| | Check, |
| | Share2, |
| | FileText, |
| | Link2, |
| | Mail, |
| | } from "lucide-react"; |
| | import { Button } from "@/components/ui/button"; |
| | import { |
| | DropdownMenu, |
| | DropdownMenuContent, |
| | DropdownMenuItem, |
| | DropdownMenuSeparator, |
| | DropdownMenuTrigger, |
| | } from "@/components/ui/dropdown-menu"; |
| | import { cn } from "@/lib/utils"; |
| | import ShareModal from "@/components/ShareModal"; |
| | import ShareLinkModal from "@/components/ShareLinkModal"; |
| | import { shareExtraction, createShareLink } from "@/services/api"; |
| |
|
| | |
| | function prepareFieldsForOutput(fields, format = "json") { |
| | if (!fields || typeof fields !== "object") { |
| | return fields; |
| | } |
| | |
| | const output = { ...fields }; |
| | |
| | |
| | const rootFields = output.Fields; |
| | |
| | delete output.Fields; |
| | |
| | |
| | if (output.pages && Array.isArray(output.pages) && output.pages.length > 0) { |
| | delete output.full_text; |
| | |
| | |
| | output.pages = output.pages.map(page => { |
| | const cleanedPage = { ...page }; |
| | if (cleanedPage.fields && typeof cleanedPage.fields === "object") { |
| | const cleanedFields = { ...cleanedPage.fields }; |
| | |
| | delete cleanedFields.full_text; |
| | cleanedPage.fields = cleanedFields; |
| | } |
| | return cleanedPage; |
| | }); |
| | } |
| | |
| | |
| | if ((format === "json" || format === "xml") && output.pages && Array.isArray(output.pages)) { |
| | |
| | const topLevelKeys = new Set(Object.keys(output).filter(k => k !== "pages" && k !== "full_text" && k !== "Fields")); |
| | |
| | output.pages.forEach((page, idx) => { |
| | const pageNum = page.page_number || idx + 1; |
| | const pageFields = page.fields || {}; |
| | |
| | |
| | |
| | |
| | const cleanedPageFields = {}; |
| | for (const [key, value] of Object.entries(pageFields)) { |
| | |
| | if (key !== "full_text" && (!topLevelKeys.has(key) || (value !== output[key]))) { |
| | cleanedPageFields[key] = value; |
| | } |
| | } |
| | |
| | const pageObj = { |
| | text: page.text || "", |
| | confidence: page.confidence || 0, |
| | doc_type: page.doc_type || "other" |
| | }; |
| | |
| | |
| | if (page.table && Array.isArray(page.table) && page.table.length > 0) { |
| | pageObj.table = page.table; |
| | } |
| | if (page.footer_notes && Array.isArray(page.footer_notes) && page.footer_notes.length > 0) { |
| | pageObj.footer_notes = page.footer_notes; |
| | } |
| | |
| | |
| | if (Object.keys(cleanedPageFields).length > 0) { |
| | pageObj.fields = cleanedPageFields; |
| | } |
| | |
| | output[`page_${pageNum}`] = pageObj; |
| | }); |
| | |
| | delete output.pages; |
| | } |
| | |
| | |
| | if (output && typeof output === "object") { |
| | const pageKeys = Object.keys(output).filter(k => k.startsWith("page_")); |
| | for (const pageKey of pageKeys) { |
| | const pageData = output[pageKey]; |
| | if (pageData && typeof pageData === "object") { |
| | |
| | delete pageData.Fields; |
| | delete pageData.metadata; |
| | } |
| | } |
| | } |
| | |
| | |
| | const finalOutput = {}; |
| | if (rootFields && typeof rootFields === "object" && Object.keys(rootFields).length > 0) { |
| | finalOutput.Fields = rootFields; |
| | } |
| | |
| | |
| | Object.keys(output).forEach(key => { |
| | finalOutput[key] = output[key]; |
| | }); |
| | |
| | return finalOutput; |
| | } |
| |
|
| | function escapeXML(str) { |
| | return str |
| | .replace(/&/g, "&") |
| | .replace(/</g, "<") |
| | .replace(/>/g, ">") |
| | .replace(/"/g, """) |
| | .replace(/'/g, "'"); |
| | } |
| |
|
| | function objectToXML(obj, rootName = "extraction") { |
| | |
| | const preparedObj = prepareFieldsForOutput(obj, "xml"); |
| | |
| | let xml = `<?xml version="1.0" encoding="UTF-8"?>\n<${rootName}>\n`; |
| | |
| | const convert = (obj, indent = " ") => { |
| | for (const [key, value] of Object.entries(obj)) { |
| | if (value === null || value === undefined) continue; |
| | |
| | |
| | if (key === "full_text" && obj.pages && Array.isArray(obj.pages) && obj.pages.length > 0) { |
| | continue; |
| | } |
| | |
| | if (Array.isArray(value)) { |
| | value.forEach((item) => { |
| | xml += `${indent}<${key}>\n`; |
| | if (typeof item === "object") { |
| | convert(item, indent + " "); |
| | } else { |
| | xml += `${indent} ${escapeXML(String(item))}\n`; |
| | } |
| | xml += `${indent}</${key}>\n`; |
| | }); |
| | } else if (typeof value === "object") { |
| | xml += `${indent}<${key}>\n`; |
| | convert(value, indent + " "); |
| | xml += `${indent}</${key}>\n`; |
| | } else { |
| | xml += `${indent}<${key}>${escapeXML(String(value))}</${key}>\n`; |
| | } |
| | } |
| | }; |
| | |
| | convert(preparedObj); |
| | xml += `</${rootName}>`; |
| | return xml; |
| | } |
| |
|
| | export default function ExportButtons({ isComplete, extractionResult }) { |
| | const [downloading, setDownloading] = useState(null); |
| | const [copied, setCopied] = useState(false); |
| | const [isShareModalOpen, setIsShareModalOpen] = useState(false); |
| | const [isShareLinkModalOpen, setIsShareLinkModalOpen] = useState(false); |
| | const [shareLink, setShareLink] = useState(""); |
| | const [isGeneratingLink, setIsGeneratingLink] = useState(false); |
| |
|
| | |
| | const extractTextFromFields = (fields) => { |
| | if (!fields || typeof fields !== "object") { |
| | return ""; |
| | } |
| | |
| | |
| | const pageKeys = Object.keys(fields).filter(key => key.startsWith("page_")); |
| | if (pageKeys.length > 0) { |
| | |
| | const pageTexts = pageKeys.map(key => { |
| | const page = fields[key]; |
| | if (page && page.text) { |
| | return page.text; |
| | } |
| | return ""; |
| | }).filter(text => text); |
| | |
| | if (pageTexts.length > 0) { |
| | return pageTexts.join("\n\n"); |
| | } |
| | } |
| | |
| | |
| | if (fields.full_text) { |
| | return fields.full_text; |
| | } |
| | |
| | return ""; |
| | }; |
| |
|
| | |
| | const escapeHtml = (text) => { |
| | if (!text) return ''; |
| | const div = document.createElement('div'); |
| | div.textContent = text; |
| | return div.innerHTML; |
| | }; |
| |
|
| | |
| | const convertPipeTablesToHTML = (text) => { |
| | if (!text) return text; |
| | |
| | const lines = text.split('\n'); |
| | const result = []; |
| | let i = 0; |
| | |
| | while (i < lines.length) { |
| | const line = lines[i]; |
| | |
| | |
| | if (line.includes('|') && line.split('|').length >= 3) { |
| | |
| | const isSeparator = /^[\s|\-:]+$/.test(line.trim()); |
| | |
| | if (!isSeparator) { |
| | |
| | const tableRows = []; |
| | let j = i; |
| | |
| | |
| | const headerLine = lines[j]; |
| | const headerCells = headerLine.split('|').map(cell => cell.trim()).filter(cell => cell || cell === ''); |
| | |
| | if (headerCells.length > 0 && !headerCells[0]) headerCells.shift(); |
| | if (headerCells.length > 0 && !headerCells[headerCells.length - 1]) headerCells.pop(); |
| | |
| | if (headerCells.length >= 2) { |
| | tableRows.push(headerCells); |
| | j++; |
| | |
| | |
| | if (j < lines.length && /^[\s|\-:]+$/.test(lines[j].trim())) { |
| | j++; |
| | } |
| | |
| | |
| | while (j < lines.length) { |
| | const rowLine = lines[j]; |
| | if (!rowLine.trim()) break; |
| | |
| | |
| | if (rowLine.includes('|') && rowLine.split('|').length >= 2) { |
| | const isRowSeparator = /^[\s|\-:]+$/.test(rowLine.trim()); |
| | if (!isRowSeparator) { |
| | const rowCells = rowLine.split('|').map(cell => cell.trim()); |
| | |
| | if (rowCells.length > 0 && !rowCells[0]) rowCells.shift(); |
| | if (rowCells.length > 0 && !rowCells[rowCells.length - 1]) rowCells.pop(); |
| | tableRows.push(rowCells); |
| | j++; |
| | } else { |
| | j++; |
| | } |
| | } else { |
| | break; |
| | } |
| | } |
| | |
| | |
| | if (tableRows.length > 0) { |
| | let htmlTable = '<table class="border-collapse border border-gray-300 w-full my-4">\n<thead>\n<tr>'; |
| | |
| | |
| | tableRows[0].forEach(cell => { |
| | htmlTable += `<th class="border border-gray-300 px-4 py-2 bg-gray-100 font-semibold text-left">${escapeHtml(cell)}</th>`; |
| | }); |
| | htmlTable += '</tr>\n</thead>\n<tbody>\n'; |
| | |
| | |
| | for (let rowIdx = 1; rowIdx < tableRows.length; rowIdx++) { |
| | htmlTable += '<tr>'; |
| | tableRows[rowIdx].forEach((cell, colIdx) => { |
| | |
| | const cellContent = cell || ''; |
| | htmlTable += `<td class="border border-gray-300 px-4 py-2">${escapeHtml(cellContent)}</td>`; |
| | }); |
| | htmlTable += '</tr>\n'; |
| | } |
| | |
| | htmlTable += '</tbody>\n</table>'; |
| | result.push(htmlTable); |
| | i = j; |
| | continue; |
| | } |
| | } |
| | } |
| | } |
| | |
| | |
| | result.push(line); |
| | i++; |
| | } |
| | |
| | return result.join('\n'); |
| | }; |
| |
|
| | |
| | const renderMarkdownToHTML = (text) => { |
| | if (!text) return ""; |
| | |
| | let html = text; |
| | |
| | |
| | html = convertPipeTablesToHTML(html); |
| | |
| | |
| | html = html.replace(/\$\s*\^\s*\{([^}]+)\}\s*\$/g, '<sup>$1</sup>'); |
| | html = html.replace(/\$\s*\^\s*([^\s$<>]+)\s*\$/g, '<sup>$1</sup>'); |
| | html = html.replace(/\$\s*_\s*\{([^}]+)\}\s*\$/g, '<sub>$1</sub>'); |
| | html = html.replace(/\$\s*_\s*([^\s$<>]+)\s*\$/g, '<sub>$1</sub>'); |
| | |
| | |
| | const htmlBlocks = []; |
| | let htmlBlockIndex = 0; |
| | |
| | html = html.replace(/<table[\s\S]*?<\/table>/gi, (match) => { |
| | const placeholder = `__HTML_BLOCK_${htmlBlockIndex}__`; |
| | htmlBlocks[htmlBlockIndex] = match; |
| | htmlBlockIndex++; |
| | return placeholder; |
| | }); |
| | |
| | |
| | html = html.replace(/^### (.*$)/gim, '<h3>$1</h3>'); |
| | html = html.replace(/^## (.*$)/gim, '<h2>$1</h2>'); |
| | html = html.replace(/^# (.*$)/gim, '<h1>$1</h1>'); |
| | |
| | |
| | html = html.replace(/\*\*(.*?)\*\*/g, '<strong>$1</strong>'); |
| | html = html.replace(/\*(.*?)\*/g, '<em>$1</em>'); |
| | |
| | |
| | html = html.replace(/\[([^\]]+)\]\(([^)]+)\)/g, '<a href="$2">$1</a>'); |
| | |
| | |
| | const parts = html.split(/(__HTML_BLOCK_\d+__)/); |
| | const processedParts = parts.map((part) => { |
| | if (part.match(/^__HTML_BLOCK_\d+__$/)) { |
| | const blockIndex = parseInt(part.match(/\d+/)[0]); |
| | return htmlBlocks[blockIndex]; |
| | } else { |
| | let processed = part; |
| | processed = processed.replace(/\n\n+/g, '</p><p>'); |
| | processed = processed.replace(/([^\n>])\n([^\n<])/g, '$1<br>$2'); |
| | if (processed.trim() && !processed.trim().startsWith('<')) { |
| | processed = '<p>' + processed + '</p>'; |
| | } |
| | return processed; |
| | } |
| | }); |
| | |
| | html = processedParts.join(''); |
| | html = html.replace(/<p><\/p>/g, ''); |
| | html = html.replace(/<p>\s*<br>\s*<\/p>/g, ''); |
| | html = html.replace(/<p>\s*<\/p>/g, ''); |
| | |
| | return html; |
| | }; |
| |
|
| | const handleDownload = async (format) => { |
| | if (!extractionResult || !extractionResult.fields) { |
| | console.error("No extraction data available"); |
| | return; |
| | } |
| |
|
| | setDownloading(format); |
| | |
| | try { |
| | const fields = extractionResult.fields; |
| | let content = ""; |
| | let filename = ""; |
| | let mimeType = ""; |
| |
|
| | if (format === "json") { |
| | const preparedFields = prepareFieldsForOutput(fields, "json"); |
| | content = JSON.stringify(preparedFields, null, 2); |
| | filename = `extraction_${new Date().toISOString().split('T')[0]}.json`; |
| | mimeType = "application/json"; |
| | } else if (format === "xml") { |
| | content = objectToXML(fields); |
| | filename = `extraction_${new Date().toISOString().split('T')[0]}.xml`; |
| | mimeType = "application/xml"; |
| | } else if (format === "docx") { |
| | |
| | |
| | const textContent = extractTextFromFields(fields); |
| | const htmlContent = renderMarkdownToHTML(textContent); |
| | |
| | |
| | |
| | const wordHTML = `<!DOCTYPE html> |
| | <html xmlns:o="urn:schemas-microsoft-com:office:office" xmlns:w="urn:schemas-microsoft-com:office:word" xmlns="http://www.w3.org/TR/REC-html40"> |
| | <head> |
| | <meta charset="UTF-8"> |
| | <meta name="ProgId" content="Word.Document"> |
| | <meta name="Generator" content="Microsoft Word"> |
| | <meta name="Originator" content="Microsoft Word"> |
| | <!--[if gte mso 9]><xml> |
| | <w:WordDocument> |
| | <w:View>Print</w:View> |
| | <w:Zoom>100</w:Zoom> |
| | <w:DoNotOptimizeForBrowser/> |
| | </w:WordDocument> |
| | </xml><![endif]--> |
| | <title>Document Extraction</title> |
| | <style> |
| | @page { |
| | size: 8.5in 11in; |
| | margin: 1in; |
| | } |
| | body { |
| | font-family: 'Calibri', 'Arial', sans-serif; |
| | font-size: 11pt; |
| | line-height: 1.6; |
| | margin: 0; |
| | color: #333; |
| | } |
| | h1 { |
| | font-size: 18pt; |
| | font-weight: bold; |
| | color: #0f172a; |
| | margin-top: 24pt; |
| | margin-bottom: 12pt; |
| | page-break-after: avoid; |
| | } |
| | h2 { |
| | font-size: 16pt; |
| | font-weight: 600; |
| | color: #0f172a; |
| | margin-top: 20pt; |
| | margin-bottom: 10pt; |
| | page-break-after: avoid; |
| | } |
| | h3 { |
| | font-size: 14pt; |
| | font-weight: 600; |
| | color: #1e293b; |
| | margin-top: 16pt; |
| | margin-bottom: 8pt; |
| | page-break-after: avoid; |
| | } |
| | p { |
| | margin-top: 6pt; |
| | margin-bottom: 6pt; |
| | } |
| | table { |
| | width: 100%; |
| | border-collapse: collapse; |
| | margin: 12pt 0; |
| | font-size: 10pt; |
| | page-break-inside: avoid; |
| | } |
| | table th { |
| | background-color: #f8fafc; |
| | border: 1pt solid #cbd5e1; |
| | padding: 6pt; |
| | text-align: left; |
| | font-weight: 600; |
| | color: #0f172a; |
| | } |
| | table td { |
| | border: 1pt solid #cbd5e1; |
| | padding: 6pt; |
| | color: #334155; |
| | } |
| | table tr:nth-child(even) { |
| | background-color: #f8fafc; |
| | } |
| | sup { |
| | font-size: 0.75em; |
| | vertical-align: super; |
| | line-height: 0; |
| | } |
| | sub { |
| | font-size: 0.75em; |
| | vertical-align: sub; |
| | line-height: 0; |
| | } |
| | strong { |
| | font-weight: 600; |
| | } |
| | em { |
| | font-style: italic; |
| | } |
| | a { |
| | color: #4f46e5; |
| | text-decoration: underline; |
| | } |
| | </style> |
| | </head> |
| | <body> |
| | ${htmlContent} |
| | </body> |
| | </html>`; |
| | |
| | content = wordHTML; |
| | filename = `extraction_${new Date().toISOString().split('T')[0]}.doc`; |
| | mimeType = "application/msword"; |
| | } |
| |
|
| | |
| | const blob = new Blob([content], { type: mimeType }); |
| | const url = URL.createObjectURL(blob); |
| | const link = document.createElement("a"); |
| | link.href = url; |
| | link.download = filename; |
| | document.body.appendChild(link); |
| | link.click(); |
| | document.body.removeChild(link); |
| | URL.revokeObjectURL(url); |
| |
|
| | setDownloading(null); |
| | } catch (error) { |
| | console.error("Download error:", error); |
| | setDownloading(null); |
| | } |
| | }; |
| |
|
| | const handleCopyLink = async () => { |
| | if (!extractionResult?.id) return; |
| | |
| | setIsGeneratingLink(true); |
| | setIsShareLinkModalOpen(true); |
| | setShareLink(""); |
| | |
| | try { |
| | const result = await createShareLink(extractionResult.id); |
| | if (result.success && result.share_link) { |
| | setShareLink(result.share_link); |
| | } else { |
| | throw new Error("Failed to generate share link"); |
| | } |
| | } catch (err) { |
| | console.error("Failed to create share link:", err); |
| | setShareLink(""); |
| | |
| | } finally { |
| | setIsGeneratingLink(false); |
| | } |
| | }; |
| |
|
| | const handleShare = async (extractionId, recipientEmail) => { |
| | await shareExtraction(extractionId, recipientEmail); |
| | }; |
| |
|
| | if (!isComplete) return null; |
| |
|
| | return ( |
| | <motion.div |
| | initial={{ opacity: 0, y: 20 }} |
| | animate={{ opacity: 1, y: 0 }} |
| | className="flex items-center gap-3" |
| | > |
| | {/* Export Options Dropdown */} |
| | <DropdownMenu> |
| | <DropdownMenuTrigger asChild> |
| | <Button |
| | variant="ghost" |
| | className="h-11 w-11 rounded-xl hover:bg-slate-100" |
| | disabled={downloading !== null} |
| | > |
| | {downloading ? ( |
| | <motion.div |
| | animate={{ rotate: 360 }} |
| | transition={{ duration: 1, repeat: Infinity, ease: "linear" }} |
| | > |
| | <Download className="h-4 w-4" /> |
| | </motion.div> |
| | ) : ( |
| | <Share2 className="h-4 w-4" /> |
| | )} |
| | </Button> |
| | </DropdownMenuTrigger> |
| | <DropdownMenuContent align="end" className="w-56 rounded-xl p-2"> |
| | <DropdownMenuItem |
| | className="rounded-lg cursor-pointer" |
| | onClick={() => setIsShareModalOpen(true)} |
| | > |
| | <Mail className="h-4 w-4 mr-2 text-indigo-600" /> |
| | Share output |
| | </DropdownMenuItem> |
| | <DropdownMenuItem |
| | className="rounded-lg cursor-pointer" |
| | onClick={handleCopyLink} |
| | > |
| | <Link2 className="h-4 w-4 mr-2 text-indigo-600" /> |
| | Copy share link |
| | </DropdownMenuItem> |
| | <DropdownMenuSeparator /> |
| | <DropdownMenuItem |
| | className="rounded-lg cursor-pointer" |
| | onClick={() => handleDownload("docx")} |
| | disabled={downloading === "docx"} |
| | > |
| | {downloading === "docx" ? ( |
| | <motion.div |
| | animate={{ rotate: 360 }} |
| | transition={{ duration: 1, repeat: Infinity, ease: "linear" }} |
| | className="h-4 w-4 mr-2" |
| | > |
| | <Download className="h-4 w-4" /> |
| | </motion.div> |
| | ) : ( |
| | <FileText className="h-4 w-4 mr-2 text-blue-600" /> |
| | )} |
| | Download Docx |
| | </DropdownMenuItem> |
| | <DropdownMenuItem |
| | className="rounded-lg cursor-pointer" |
| | onClick={() => handleDownload("json")} |
| | disabled={downloading === "json"} |
| | > |
| | {downloading === "json" ? ( |
| | <motion.div |
| | animate={{ rotate: 360 }} |
| | transition={{ duration: 1, repeat: Infinity, ease: "linear" }} |
| | className="h-4 w-4 mr-2" |
| | > |
| | <Download className="h-4 w-4" /> |
| | </motion.div> |
| | ) : ( |
| | <Braces className="h-4 w-4 mr-2 text-indigo-600" /> |
| | )} |
| | Download JSON |
| | </DropdownMenuItem> |
| | <DropdownMenuItem |
| | className="rounded-lg cursor-pointer" |
| | onClick={() => handleDownload("xml")} |
| | disabled={downloading === "xml"} |
| | > |
| | {downloading === "xml" ? ( |
| | <motion.div |
| | animate={{ rotate: 360 }} |
| | transition={{ duration: 1, repeat: Infinity, ease: "linear" }} |
| | className="h-4 w-4 mr-2" |
| | > |
| | <Download className="h-4 w-4" /> |
| | </motion.div> |
| | ) : ( |
| | <FileCode2 className="h-4 w-4 mr-2 text-slate-600" /> |
| | )} |
| | Download XML |
| | </DropdownMenuItem> |
| | </DropdownMenuContent> |
| | </DropdownMenu> |
| | |
| | {/* Share Modal */} |
| | <ShareModal |
| | isOpen={isShareModalOpen} |
| | onClose={() => setIsShareModalOpen(false)} |
| | onShare={handleShare} |
| | extractionId={extractionResult?.id} |
| | /> |
| | |
| | {/* Share Link Modal */} |
| | <ShareLinkModal |
| | isOpen={isShareLinkModalOpen} |
| | onClose={() => { |
| | setIsShareLinkModalOpen(false); |
| | setShareLink(""); |
| | }} |
| | shareLink={shareLink} |
| | isLoading={isGeneratingLink} |
| | /> |
| | </motion.div> |
| | ); |
| | } |
| |
|