Spaces:
Sleeping
Sleeping
| import React, { useState } from "react"; | |
| import { motion, AnimatePresence } from "framer-motion"; | |
| import { | |
| Download, | |
| Braces, | |
| FileCode2, | |
| Check, | |
| Share2, | |
| FileText, | |
| Link2, | |
| Mail, | |
| } from "lucide-react"; | |
| import { Button } from "@/components/ui/button"; | |
| import { | |
| DropdownMenu, | |
| DropdownMenuContent, | |
| DropdownMenuItem, | |
| DropdownMenuSeparator, | |
| DropdownMenuTrigger, | |
| } from "@/components/ui/dropdown-menu"; | |
| import { cn } from "@/lib/utils"; | |
| import ShareModal from "@/components/ShareModal"; | |
| import ShareLinkModal from "@/components/ShareLinkModal"; | |
| import { shareExtraction, createShareLink } from "@/services/api"; | |
| // Helper functions from ExtractionOutput | |
| function prepareFieldsForOutput(fields, format = "json") { | |
| if (!fields || typeof fields !== "object") { | |
| return fields; | |
| } | |
| const output = { ...fields }; | |
| // Extract Fields from root level if it exists | |
| const rootFields = output.Fields; | |
| // Remove Fields from output temporarily (will be added back at top) | |
| delete output.Fields; | |
| // Remove full_text from top-level if pages array exists (to avoid duplication) | |
| if (output.pages && Array.isArray(output.pages) && output.pages.length > 0) { | |
| delete output.full_text; | |
| // Clean up each page: remove full_text from page.fields (it duplicates page.text) | |
| output.pages = output.pages.map(page => { | |
| const cleanedPage = { ...page }; | |
| if (cleanedPage.fields && typeof cleanedPage.fields === "object") { | |
| const cleanedFields = { ...cleanedPage.fields }; | |
| // Remove full_text from page fields (duplicates page.text) | |
| delete cleanedFields.full_text; | |
| cleanedPage.fields = cleanedFields; | |
| } | |
| return cleanedPage; | |
| }); | |
| } | |
| // For JSON and XML: restructure pages into separate top-level fields (page_1, page_2, etc.) | |
| if ((format === "json" || format === "xml") && output.pages && Array.isArray(output.pages)) { | |
| // Get top-level field keys (these are merged from all pages - avoid duplicating in page fields) | |
| const topLevelKeys = new Set(Object.keys(output).filter(k => k !== "pages" && k !== "full_text" && k !== "Fields")); | |
| output.pages.forEach((page, idx) => { | |
| const pageNum = page.page_number || idx + 1; | |
| const pageFields = page.fields || {}; | |
| // Remove duplicate fields from page.fields: | |
| // 1. Remove full_text (duplicates page.text) | |
| // 2. Remove fields that match top-level fields (already shown at root) | |
| const cleanedPageFields = {}; | |
| for (const [key, value] of Object.entries(pageFields)) { | |
| // Skip full_text and fields that match top-level exactly | |
| if (key !== "full_text" && (!topLevelKeys.has(key) || (value !== output[key]))) { | |
| cleanedPageFields[key] = value; | |
| } | |
| } | |
| const pageObj = { | |
| text: page.text || "", | |
| confidence: page.confidence || 0, | |
| doc_type: page.doc_type || "other" | |
| }; | |
| // Add table and footer_notes if they exist | |
| if (page.table && Array.isArray(page.table) && page.table.length > 0) { | |
| pageObj.table = page.table; | |
| } | |
| if (page.footer_notes && Array.isArray(page.footer_notes) && page.footer_notes.length > 0) { | |
| pageObj.footer_notes = page.footer_notes; | |
| } | |
| // Only add fields if there are unique page-specific fields | |
| if (Object.keys(cleanedPageFields).length > 0) { | |
| pageObj.fields = cleanedPageFields; | |
| } | |
| output[`page_${pageNum}`] = pageObj; | |
| }); | |
| // Remove pages array - we now have page_1, page_2, etc. as separate fields | |
| delete output.pages; | |
| } | |
| // Handle page_X structure (from backend) - remove Fields from page objects if they exist | |
| if (output && typeof output === "object") { | |
| const pageKeys = Object.keys(output).filter(k => k.startsWith("page_")); | |
| for (const pageKey of pageKeys) { | |
| const pageData = output[pageKey]; | |
| if (pageData && typeof pageData === "object") { | |
| // Remove Fields from page objects (it's now at root level) | |
| delete pageData.Fields; | |
| delete pageData.metadata; | |
| } | |
| } | |
| } | |
| // Rebuild output with Fields at the top (only if it exists and is not empty) | |
| const finalOutput = {}; | |
| if (rootFields && typeof rootFields === "object" && Object.keys(rootFields).length > 0) { | |
| finalOutput.Fields = rootFields; | |
| } | |
| // Add all other keys | |
| Object.keys(output).forEach(key => { | |
| finalOutput[key] = output[key]; | |
| }); | |
| return finalOutput; | |
| } | |
| function escapeXML(str) { | |
| return str | |
| .replace(/&/g, "&") | |
| .replace(/</g, "<") | |
| .replace(/>/g, ">") | |
| .replace(/"/g, """) | |
| .replace(/'/g, "'"); | |
| } | |
| function objectToXML(obj, rootName = "extraction") { | |
| // Prepare fields - remove full_text if pages exist | |
| const preparedObj = prepareFieldsForOutput(obj, "xml"); | |
| let xml = `<?xml version="1.0" encoding="UTF-8"?>\n<${rootName}>\n`; | |
| const convert = (obj, indent = " ") => { | |
| for (const [key, value] of Object.entries(obj)) { | |
| if (value === null || value === undefined) continue; | |
| // Skip full_text if pages exist (already handled in prepareFieldsForOutput) | |
| if (key === "full_text" && obj.pages && Array.isArray(obj.pages) && obj.pages.length > 0) { | |
| continue; | |
| } | |
| if (Array.isArray(value)) { | |
| value.forEach((item) => { | |
| xml += `${indent}<${key}>\n`; | |
| if (typeof item === "object") { | |
| convert(item, indent + " "); | |
| } else { | |
| xml += `${indent} ${escapeXML(String(item))}\n`; | |
| } | |
| xml += `${indent}</${key}>\n`; | |
| }); | |
| } else if (typeof value === "object") { | |
| xml += `${indent}<${key}>\n`; | |
| convert(value, indent + " "); | |
| xml += `${indent}</${key}>\n`; | |
| } else { | |
| xml += `${indent}<${key}>${escapeXML(String(value))}</${key}>\n`; | |
| } | |
| } | |
| }; | |
| convert(preparedObj); | |
| xml += `</${rootName}>`; | |
| return xml; | |
| } | |
| export default function ExportButtons({ isComplete, extractionResult }) { | |
| const [downloading, setDownloading] = useState(null); | |
| const [copied, setCopied] = useState(false); | |
| const [isShareModalOpen, setIsShareModalOpen] = useState(false); | |
| const [isShareLinkModalOpen, setIsShareLinkModalOpen] = useState(false); | |
| const [shareLink, setShareLink] = useState(""); | |
| const [isGeneratingLink, setIsGeneratingLink] = useState(false); | |
| // Helper function to extract text from fields (same as in ExtractionOutput) | |
| const extractTextFromFields = (fields) => { | |
| if (!fields || typeof fields !== "object") { | |
| return ""; | |
| } | |
| // Check for page_X structure first (preferred format) | |
| const pageKeys = Object.keys(fields).filter(key => key.startsWith("page_")); | |
| if (pageKeys.length > 0) { | |
| // Get text from first page (or combine all pages) | |
| const pageTexts = pageKeys.map(key => { | |
| const page = fields[key]; | |
| if (page && page.text) { | |
| return page.text; | |
| } | |
| return ""; | |
| }).filter(text => text); | |
| if (pageTexts.length > 0) { | |
| return pageTexts.join("\n\n"); | |
| } | |
| } | |
| // Fallback to full_text | |
| if (fields.full_text) { | |
| return fields.full_text; | |
| } | |
| return ""; | |
| }; | |
| // Helper function to escape HTML | |
| const escapeHtml = (text) => { | |
| if (!text) return ''; | |
| const div = document.createElement('div'); | |
| div.textContent = text; | |
| return div.innerHTML; | |
| }; | |
| // Helper function to convert pipe-separated tables to HTML tables | |
| const convertPipeTablesToHTML = (text) => { | |
| if (!text) return text; | |
| const lines = text.split('\n'); | |
| const result = []; | |
| let i = 0; | |
| while (i < lines.length) { | |
| const line = lines[i]; | |
| // Check if this line looks like a table row (has multiple pipes) | |
| if (line.includes('|') && line.split('|').length >= 3) { | |
| // Check if it's a separator line (only |, -, :, spaces) | |
| const isSeparator = /^[\s|\-:]+$/.test(line.trim()); | |
| if (!isSeparator) { | |
| // Start of a table - collect all table rows | |
| const tableRows = []; | |
| let j = i; | |
| // Collect header row | |
| const headerLine = lines[j]; | |
| const headerCells = headerLine.split('|').map(cell => cell.trim()).filter(cell => cell || cell === ''); | |
| // Remove empty cells at start/end | |
| if (headerCells.length > 0 && !headerCells[0]) headerCells.shift(); | |
| if (headerCells.length > 0 && !headerCells[headerCells.length - 1]) headerCells.pop(); | |
| if (headerCells.length >= 2) { | |
| tableRows.push(headerCells); | |
| j++; | |
| // Skip separator line if present | |
| if (j < lines.length && /^[\s|\-:]+$/.test(lines[j].trim())) { | |
| j++; | |
| } | |
| // Collect data rows | |
| while (j < lines.length) { | |
| const rowLine = lines[j]; | |
| if (!rowLine.trim()) break; // Empty line ends table | |
| // Check if it's still a table row | |
| if (rowLine.includes('|') && rowLine.split('|').length >= 2) { | |
| const isRowSeparator = /^[\s|\-:]+$/.test(rowLine.trim()); | |
| if (!isRowSeparator) { | |
| const rowCells = rowLine.split('|').map(cell => cell.trim()); | |
| // Remove empty cells at start/end | |
| if (rowCells.length > 0 && !rowCells[0]) rowCells.shift(); | |
| if (rowCells.length > 0 && !rowCells[rowCells.length - 1]) rowCells.pop(); | |
| tableRows.push(rowCells); | |
| j++; | |
| } else { | |
| j++; | |
| } | |
| } else { | |
| break; // Not a table row anymore | |
| } | |
| } | |
| // Convert to HTML table | |
| if (tableRows.length > 0) { | |
| let htmlTable = '<table class="border-collapse border border-gray-300 w-full my-4">\n<thead>\n<tr>'; | |
| // Header row | |
| tableRows[0].forEach(cell => { | |
| htmlTable += `<th class="border border-gray-300 px-4 py-2 bg-gray-100 font-semibold text-left">${escapeHtml(cell)}</th>`; | |
| }); | |
| htmlTable += '</tr>\n</thead>\n<tbody>\n'; | |
| // Data rows | |
| for (let rowIdx = 1; rowIdx < tableRows.length; rowIdx++) { | |
| htmlTable += '<tr>'; | |
| tableRows[rowIdx].forEach((cell, colIdx) => { | |
| // Use header cell count to ensure alignment | |
| const cellContent = cell || ''; | |
| htmlTable += `<td class="border border-gray-300 px-4 py-2">${escapeHtml(cellContent)}</td>`; | |
| }); | |
| htmlTable += '</tr>\n'; | |
| } | |
| htmlTable += '</tbody>\n</table>'; | |
| result.push(htmlTable); | |
| i = j; | |
| continue; | |
| } | |
| } | |
| } | |
| } | |
| // Not a table row, add as-is | |
| result.push(line); | |
| i++; | |
| } | |
| return result.join('\n'); | |
| }; | |
| // Helper function to render markdown to HTML (same as in ExtractionOutput) | |
| const renderMarkdownToHTML = (text) => { | |
| if (!text) return ""; | |
| let html = text; | |
| // FIRST: Convert pipe-separated tables to HTML tables | |
| html = convertPipeTablesToHTML(html); | |
| // Convert LaTeX-style superscripts/subscripts FIRST | |
| html = html.replace(/\$\s*\^\s*\{([^}]+)\}\s*\$/g, '<sup>$1</sup>'); | |
| html = html.replace(/\$\s*\^\s*([^\s$<>]+)\s*\$/g, '<sup>$1</sup>'); | |
| html = html.replace(/\$\s*_\s*\{([^}]+)\}\s*\$/g, '<sub>$1</sub>'); | |
| html = html.replace(/\$\s*_\s*([^\s$<>]+)\s*\$/g, '<sub>$1</sub>'); | |
| // Protect HTML table blocks | |
| const htmlBlocks = []; | |
| let htmlBlockIndex = 0; | |
| html = html.replace(/<table[\s\S]*?<\/table>/gi, (match) => { | |
| const placeholder = `__HTML_BLOCK_${htmlBlockIndex}__`; | |
| htmlBlocks[htmlBlockIndex] = match; | |
| htmlBlockIndex++; | |
| return placeholder; | |
| }); | |
| // Convert markdown headers | |
| html = html.replace(/^### (.*$)/gim, '<h3>$1</h3>'); | |
| html = html.replace(/^## (.*$)/gim, '<h2>$1</h2>'); | |
| html = html.replace(/^# (.*$)/gim, '<h1>$1</h1>'); | |
| // Convert markdown bold/italic | |
| html = html.replace(/\*\*(.*?)\*\*/g, '<strong>$1</strong>'); | |
| html = html.replace(/\*(.*?)\*/g, '<em>$1</em>'); | |
| // Convert markdown links | |
| html = html.replace(/\[([^\]]+)\]\(([^)]+)\)/g, '<a href="$2">$1</a>'); | |
| // Process line breaks | |
| const parts = html.split(/(__HTML_BLOCK_\d+__)/); | |
| const processedParts = parts.map((part) => { | |
| if (part.match(/^__HTML_BLOCK_\d+__$/)) { | |
| const blockIndex = parseInt(part.match(/\d+/)[0]); | |
| return htmlBlocks[blockIndex]; | |
| } else { | |
| let processed = part; | |
| processed = processed.replace(/\n\n+/g, '</p><p>'); | |
| processed = processed.replace(/([^\n>])\n([^\n<])/g, '$1<br>$2'); | |
| if (processed.trim() && !processed.trim().startsWith('<')) { | |
| processed = '<p>' + processed + '</p>'; | |
| } | |
| return processed; | |
| } | |
| }); | |
| html = processedParts.join(''); | |
| html = html.replace(/<p><\/p>/g, ''); | |
| html = html.replace(/<p>\s*<br>\s*<\/p>/g, ''); | |
| html = html.replace(/<p>\s*<\/p>/g, ''); | |
| return html; | |
| }; | |
| const handleDownload = async (format) => { | |
| if (!extractionResult || !extractionResult.fields) { | |
| console.error("No extraction data available"); | |
| return; | |
| } | |
| setDownloading(format); | |
| try { | |
| const fields = extractionResult.fields; | |
| let content = ""; | |
| let filename = ""; | |
| let mimeType = ""; | |
| if (format === "json") { | |
| const preparedFields = prepareFieldsForOutput(fields, "json"); | |
| content = JSON.stringify(preparedFields, null, 2); | |
| filename = `extraction_${new Date().toISOString().split('T')[0]}.json`; | |
| mimeType = "application/json"; | |
| } else if (format === "xml") { | |
| content = objectToXML(fields); | |
| filename = `extraction_${new Date().toISOString().split('T')[0]}.xml`; | |
| mimeType = "application/xml"; | |
| } else if (format === "docx") { | |
| // For DOCX, create a Word-compatible HTML document that preserves layout | |
| // Extract text and convert to HTML (same as text viewer) | |
| const textContent = extractTextFromFields(fields); | |
| const htmlContent = renderMarkdownToHTML(textContent); | |
| // Create a Word-compatible HTML document with proper MIME type | |
| // Word can open HTML files with .docx extension if we use the right MIME type | |
| const wordHTML = `<!DOCTYPE html> | |
| <html xmlns:o="urn:schemas-microsoft-com:office:office" xmlns:w="urn:schemas-microsoft-com:office:word" xmlns="http://www.w3.org/TR/REC-html40"> | |
| <head> | |
| <meta charset="UTF-8"> | |
| <meta name="ProgId" content="Word.Document"> | |
| <meta name="Generator" content="Microsoft Word"> | |
| <meta name="Originator" content="Microsoft Word"> | |
| <!--[if gte mso 9]><xml> | |
| <w:WordDocument> | |
| <w:View>Print</w:View> | |
| <w:Zoom>100</w:Zoom> | |
| <w:DoNotOptimizeForBrowser/> | |
| </w:WordDocument> | |
| </xml><![endif]--> | |
| <title>Document Extraction</title> | |
| <style> | |
| @page { | |
| size: 8.5in 11in; | |
| margin: 1in; | |
| } | |
| body { | |
| font-family: 'Calibri', 'Arial', sans-serif; | |
| font-size: 11pt; | |
| line-height: 1.6; | |
| margin: 0; | |
| color: #333; | |
| } | |
| h1 { | |
| font-size: 18pt; | |
| font-weight: bold; | |
| color: #0f172a; | |
| margin-top: 24pt; | |
| margin-bottom: 12pt; | |
| page-break-after: avoid; | |
| } | |
| h2 { | |
| font-size: 16pt; | |
| font-weight: 600; | |
| color: #0f172a; | |
| margin-top: 20pt; | |
| margin-bottom: 10pt; | |
| page-break-after: avoid; | |
| } | |
| h3 { | |
| font-size: 14pt; | |
| font-weight: 600; | |
| color: #1e293b; | |
| margin-top: 16pt; | |
| margin-bottom: 8pt; | |
| page-break-after: avoid; | |
| } | |
| p { | |
| margin-top: 6pt; | |
| margin-bottom: 6pt; | |
| } | |
| table { | |
| width: 100%; | |
| border-collapse: collapse; | |
| margin: 12pt 0; | |
| font-size: 10pt; | |
| page-break-inside: avoid; | |
| } | |
| table th { | |
| background-color: #f8fafc; | |
| border: 1pt solid #cbd5e1; | |
| padding: 6pt; | |
| text-align: left; | |
| font-weight: 600; | |
| color: #0f172a; | |
| } | |
| table td { | |
| border: 1pt solid #cbd5e1; | |
| padding: 6pt; | |
| color: #334155; | |
| } | |
| table tr:nth-child(even) { | |
| background-color: #f8fafc; | |
| } | |
| sup { | |
| font-size: 0.75em; | |
| vertical-align: super; | |
| line-height: 0; | |
| } | |
| sub { | |
| font-size: 0.75em; | |
| vertical-align: sub; | |
| line-height: 0; | |
| } | |
| strong { | |
| font-weight: 600; | |
| } | |
| em { | |
| font-style: italic; | |
| } | |
| a { | |
| color: #4f46e5; | |
| text-decoration: underline; | |
| } | |
| </style> | |
| </head> | |
| <body> | |
| ${htmlContent} | |
| </body> | |
| </html>`; | |
| content = wordHTML; | |
| filename = `extraction_${new Date().toISOString().split('T')[0]}.doc`; | |
| mimeType = "application/msword"; | |
| } | |
| // Create blob and download | |
| const blob = new Blob([content], { type: mimeType }); | |
| const url = URL.createObjectURL(blob); | |
| const link = document.createElement("a"); | |
| link.href = url; | |
| link.download = filename; | |
| document.body.appendChild(link); | |
| link.click(); | |
| document.body.removeChild(link); | |
| URL.revokeObjectURL(url); | |
| setDownloading(null); | |
| } catch (error) { | |
| console.error("Download error:", error); | |
| setDownloading(null); | |
| } | |
| }; | |
| const handleCopyLink = async () => { | |
| if (!extractionResult?.id) return; | |
| setIsGeneratingLink(true); | |
| setIsShareLinkModalOpen(true); | |
| setShareLink(""); | |
| try { | |
| const result = await createShareLink(extractionResult.id); | |
| if (result.success && result.share_link) { | |
| setShareLink(result.share_link); | |
| } else { | |
| throw new Error("Failed to generate share link"); | |
| } | |
| } catch (err) { | |
| console.error("Failed to create share link:", err); | |
| setShareLink(""); | |
| // Still show modal but with error state | |
| } finally { | |
| setIsGeneratingLink(false); | |
| } | |
| }; | |
| const handleShare = async (extractionId, recipientEmail) => { | |
| await shareExtraction(extractionId, recipientEmail); | |
| }; | |
| if (!isComplete) return null; | |
| return ( | |
| <motion.div | |
| initial={{ opacity: 0, y: 20 }} | |
| animate={{ opacity: 1, y: 0 }} | |
| className="flex items-center gap-3" | |
| > | |
| {/* Export Options Dropdown */} | |
| <DropdownMenu> | |
| <DropdownMenuTrigger asChild> | |
| <Button | |
| variant="ghost" | |
| className="h-11 w-11 rounded-xl hover:bg-slate-100" | |
| disabled={downloading !== null} | |
| > | |
| {downloading ? ( | |
| <motion.div | |
| animate={{ rotate: 360 }} | |
| transition={{ duration: 1, repeat: Infinity, ease: "linear" }} | |
| > | |
| <Download className="h-4 w-4" /> | |
| </motion.div> | |
| ) : ( | |
| <Share2 className="h-4 w-4" /> | |
| )} | |
| </Button> | |
| </DropdownMenuTrigger> | |
| <DropdownMenuContent align="end" className="w-56 rounded-xl p-2"> | |
| <DropdownMenuItem | |
| className="rounded-lg cursor-pointer" | |
| onClick={() => setIsShareModalOpen(true)} | |
| > | |
| <Mail className="h-4 w-4 mr-2 text-indigo-600" /> | |
| Share output | |
| </DropdownMenuItem> | |
| <DropdownMenuItem | |
| className="rounded-lg cursor-pointer" | |
| onClick={handleCopyLink} | |
| > | |
| <Link2 className="h-4 w-4 mr-2 text-indigo-600" /> | |
| Copy share link | |
| </DropdownMenuItem> | |
| <DropdownMenuSeparator /> | |
| <DropdownMenuItem | |
| className="rounded-lg cursor-pointer" | |
| onClick={() => handleDownload("docx")} | |
| disabled={downloading === "docx"} | |
| > | |
| {downloading === "docx" ? ( | |
| <motion.div | |
| animate={{ rotate: 360 }} | |
| transition={{ duration: 1, repeat: Infinity, ease: "linear" }} | |
| className="h-4 w-4 mr-2" | |
| > | |
| <Download className="h-4 w-4" /> | |
| </motion.div> | |
| ) : ( | |
| <FileText className="h-4 w-4 mr-2 text-blue-600" /> | |
| )} | |
| Download Docx | |
| </DropdownMenuItem> | |
| <DropdownMenuItem | |
| className="rounded-lg cursor-pointer" | |
| onClick={() => handleDownload("json")} | |
| disabled={downloading === "json"} | |
| > | |
| {downloading === "json" ? ( | |
| <motion.div | |
| animate={{ rotate: 360 }} | |
| transition={{ duration: 1, repeat: Infinity, ease: "linear" }} | |
| className="h-4 w-4 mr-2" | |
| > | |
| <Download className="h-4 w-4" /> | |
| </motion.div> | |
| ) : ( | |
| <Braces className="h-4 w-4 mr-2 text-indigo-600" /> | |
| )} | |
| Download JSON | |
| </DropdownMenuItem> | |
| <DropdownMenuItem | |
| className="rounded-lg cursor-pointer" | |
| onClick={() => handleDownload("xml")} | |
| disabled={downloading === "xml"} | |
| > | |
| {downloading === "xml" ? ( | |
| <motion.div | |
| animate={{ rotate: 360 }} | |
| transition={{ duration: 1, repeat: Infinity, ease: "linear" }} | |
| className="h-4 w-4 mr-2" | |
| > | |
| <Download className="h-4 w-4" /> | |
| </motion.div> | |
| ) : ( | |
| <FileCode2 className="h-4 w-4 mr-2 text-slate-600" /> | |
| )} | |
| Download XML | |
| </DropdownMenuItem> | |
| </DropdownMenuContent> | |
| </DropdownMenu> | |
| {/* Share Modal */} | |
| <ShareModal | |
| isOpen={isShareModalOpen} | |
| onClose={() => setIsShareModalOpen(false)} | |
| onShare={handleShare} | |
| extractionId={extractionResult?.id} | |
| /> | |
| {/* Share Link Modal */} | |
| <ShareLinkModal | |
| isOpen={isShareLinkModalOpen} | |
| onClose={() => { | |
| setIsShareLinkModalOpen(false); | |
| setShareLink(""); | |
| }} | |
| shareLink={shareLink} | |
| isLoading={isGeneratingLink} | |
| /> | |
| </motion.div> | |
| ); | |
| } | |