import React, { useState } from "react"; import { motion, AnimatePresence } from "framer-motion"; import { Download, Braces, FileCode2, Check, Share2, FileText, Link2, Mail, } from "lucide-react"; import { Button } from "@/components/ui/button"; import { DropdownMenu, DropdownMenuContent, DropdownMenuItem, DropdownMenuSeparator, DropdownMenuTrigger, } from "@/components/ui/dropdown-menu"; import { cn } from "@/lib/utils"; import ShareModal from "@/components/ShareModal"; import ShareLinkModal from "@/components/ShareLinkModal"; import { shareExtraction, createShareLink } from "@/services/api"; // Helper functions from ExtractionOutput function prepareFieldsForOutput(fields, format = "json") { if (!fields || typeof fields !== "object") { return fields; } const output = { ...fields }; // Extract Fields from root level if it exists const rootFields = output.Fields; // Remove Fields from output temporarily (will be added back at top) delete output.Fields; // Remove full_text from top-level if pages array exists (to avoid duplication) if (output.pages && Array.isArray(output.pages) && output.pages.length > 0) { delete output.full_text; // Clean up each page: remove full_text from page.fields (it duplicates page.text) output.pages = output.pages.map(page => { const cleanedPage = { ...page }; if (cleanedPage.fields && typeof cleanedPage.fields === "object") { const cleanedFields = { ...cleanedPage.fields }; // Remove full_text from page fields (duplicates page.text) delete cleanedFields.full_text; cleanedPage.fields = cleanedFields; } return cleanedPage; }); } // For JSON and XML: restructure pages into separate top-level fields (page_1, page_2, etc.) if ((format === "json" || format === "xml") && output.pages && Array.isArray(output.pages)) { // Get top-level field keys (these are merged from all pages - avoid duplicating in page fields) const topLevelKeys = new Set(Object.keys(output).filter(k => k !== "pages" && k !== "full_text" && k !== "Fields")); output.pages.forEach((page, idx) => { const pageNum = page.page_number || idx + 1; const pageFields = page.fields || {}; // Remove duplicate fields from page.fields: // 1. Remove full_text (duplicates page.text) // 2. Remove fields that match top-level fields (already shown at root) const cleanedPageFields = {}; for (const [key, value] of Object.entries(pageFields)) { // Skip full_text and fields that match top-level exactly if (key !== "full_text" && (!topLevelKeys.has(key) || (value !== output[key]))) { cleanedPageFields[key] = value; } } const pageObj = { text: page.text || "", confidence: page.confidence || 0, doc_type: page.doc_type || "other" }; // Add table and footer_notes if they exist if (page.table && Array.isArray(page.table) && page.table.length > 0) { pageObj.table = page.table; } if (page.footer_notes && Array.isArray(page.footer_notes) && page.footer_notes.length > 0) { pageObj.footer_notes = page.footer_notes; } // Only add fields if there are unique page-specific fields if (Object.keys(cleanedPageFields).length > 0) { pageObj.fields = cleanedPageFields; } output[`page_${pageNum}`] = pageObj; }); // Remove pages array - we now have page_1, page_2, etc. as separate fields delete output.pages; } // Handle page_X structure (from backend) - remove Fields from page objects if they exist if (output && typeof output === "object") { const pageKeys = Object.keys(output).filter(k => k.startsWith("page_")); for (const pageKey of pageKeys) { const pageData = output[pageKey]; if (pageData && typeof pageData === "object") { // Remove Fields from page objects (it's now at root level) delete pageData.Fields; delete pageData.metadata; } } } // Rebuild output with Fields at the top (only if it exists and is not empty) const finalOutput = {}; if (rootFields && typeof rootFields === "object" && Object.keys(rootFields).length > 0) { finalOutput.Fields = rootFields; } // Add all other keys Object.keys(output).forEach(key => { finalOutput[key] = output[key]; }); return finalOutput; } function escapeXML(str) { return str .replace(/&/g, "&") .replace(//g, ">") .replace(/"/g, """) .replace(/'/g, "'"); } function objectToXML(obj, rootName = "extraction") { // Prepare fields - remove full_text if pages exist const preparedObj = prepareFieldsForOutput(obj, "xml"); let xml = `\n<${rootName}>\n`; const convert = (obj, indent = " ") => { for (const [key, value] of Object.entries(obj)) { if (value === null || value === undefined) continue; // Skip full_text if pages exist (already handled in prepareFieldsForOutput) if (key === "full_text" && obj.pages && Array.isArray(obj.pages) && obj.pages.length > 0) { continue; } if (Array.isArray(value)) { value.forEach((item) => { xml += `${indent}<${key}>\n`; if (typeof item === "object") { convert(item, indent + " "); } else { xml += `${indent} ${escapeXML(String(item))}\n`; } xml += `${indent}\n`; }); } else if (typeof value === "object") { xml += `${indent}<${key}>\n`; convert(value, indent + " "); xml += `${indent}\n`; } else { xml += `${indent}<${key}>${escapeXML(String(value))}\n`; } } }; convert(preparedObj); xml += ``; return xml; } export default function ExportButtons({ isComplete, extractionResult }) { const [downloading, setDownloading] = useState(null); const [copied, setCopied] = useState(false); const [isShareModalOpen, setIsShareModalOpen] = useState(false); const [isShareLinkModalOpen, setIsShareLinkModalOpen] = useState(false); const [shareLink, setShareLink] = useState(""); const [isGeneratingLink, setIsGeneratingLink] = useState(false); // Helper function to extract text from fields (same as in ExtractionOutput) const extractTextFromFields = (fields) => { if (!fields || typeof fields !== "object") { return ""; } // Check for page_X structure first (preferred format) const pageKeys = Object.keys(fields).filter(key => key.startsWith("page_")); if (pageKeys.length > 0) { // Get text from first page (or combine all pages) const pageTexts = pageKeys.map(key => { const page = fields[key]; if (page && page.text) { return page.text; } return ""; }).filter(text => text); if (pageTexts.length > 0) { return pageTexts.join("\n\n"); } } // Fallback to full_text if (fields.full_text) { return fields.full_text; } return ""; }; // Helper function to escape HTML const escapeHtml = (text) => { if (!text) return ''; const div = document.createElement('div'); div.textContent = text; return div.innerHTML; }; // Helper function to convert pipe-separated tables to HTML tables const convertPipeTablesToHTML = (text) => { if (!text) return text; const lines = text.split('\n'); const result = []; let i = 0; while (i < lines.length) { const line = lines[i]; // Check if this line looks like a table row (has multiple pipes) if (line.includes('|') && line.split('|').length >= 3) { // Check if it's a separator line (only |, -, :, spaces) const isSeparator = /^[\s|\-:]+$/.test(line.trim()); if (!isSeparator) { // Start of a table - collect all table rows const tableRows = []; let j = i; // Collect header row const headerLine = lines[j]; const headerCells = headerLine.split('|').map(cell => cell.trim()).filter(cell => cell || cell === ''); // Remove empty cells at start/end if (headerCells.length > 0 && !headerCells[0]) headerCells.shift(); if (headerCells.length > 0 && !headerCells[headerCells.length - 1]) headerCells.pop(); if (headerCells.length >= 2) { tableRows.push(headerCells); j++; // Skip separator line if present if (j < lines.length && /^[\s|\-:]+$/.test(lines[j].trim())) { j++; } // Collect data rows while (j < lines.length) { const rowLine = lines[j]; if (!rowLine.trim()) break; // Empty line ends table // Check if it's still a table row if (rowLine.includes('|') && rowLine.split('|').length >= 2) { const isRowSeparator = /^[\s|\-:]+$/.test(rowLine.trim()); if (!isRowSeparator) { const rowCells = rowLine.split('|').map(cell => cell.trim()); // Remove empty cells at start/end if (rowCells.length > 0 && !rowCells[0]) rowCells.shift(); if (rowCells.length > 0 && !rowCells[rowCells.length - 1]) rowCells.pop(); tableRows.push(rowCells); j++; } else { j++; } } else { break; // Not a table row anymore } } // Convert to HTML table if (tableRows.length > 0) { let htmlTable = '\n\n'; // Header row tableRows[0].forEach(cell => { htmlTable += ``; }); htmlTable += '\n\n\n'; // Data rows for (let rowIdx = 1; rowIdx < tableRows.length; rowIdx++) { htmlTable += ''; tableRows[rowIdx].forEach((cell, colIdx) => { // Use header cell count to ensure alignment const cellContent = cell || ''; htmlTable += ``; }); htmlTable += '\n'; } htmlTable += '\n
${escapeHtml(cell)}
${escapeHtml(cellContent)}
'; result.push(htmlTable); i = j; continue; } } } } // Not a table row, add as-is result.push(line); i++; } return result.join('\n'); }; // Helper function to render markdown to HTML (same as in ExtractionOutput) const renderMarkdownToHTML = (text) => { if (!text) return ""; let html = text; // FIRST: Convert pipe-separated tables to HTML tables html = convertPipeTablesToHTML(html); // Convert LaTeX-style superscripts/subscripts FIRST html = html.replace(/\$\s*\^\s*\{([^}]+)\}\s*\$/g, '$1'); html = html.replace(/\$\s*\^\s*([^\s$<>]+)\s*\$/g, '$1'); html = html.replace(/\$\s*_\s*\{([^}]+)\}\s*\$/g, '$1'); html = html.replace(/\$\s*_\s*([^\s$<>]+)\s*\$/g, '$1'); // Protect HTML table blocks const htmlBlocks = []; let htmlBlockIndex = 0; html = html.replace(//gi, (match) => { const placeholder = `__HTML_BLOCK_${htmlBlockIndex}__`; htmlBlocks[htmlBlockIndex] = match; htmlBlockIndex++; return placeholder; }); // Convert markdown headers html = html.replace(/^### (.*$)/gim, '

$1

'); html = html.replace(/^## (.*$)/gim, '

$1

'); html = html.replace(/^# (.*$)/gim, '

$1

'); // Convert markdown bold/italic html = html.replace(/\*\*(.*?)\*\*/g, '$1'); html = html.replace(/\*(.*?)\*/g, '$1'); // Convert markdown links html = html.replace(/\[([^\]]+)\]\(([^)]+)\)/g, '$1'); // Process line breaks const parts = html.split(/(__HTML_BLOCK_\d+__)/); const processedParts = parts.map((part) => { if (part.match(/^__HTML_BLOCK_\d+__$/)) { const blockIndex = parseInt(part.match(/\d+/)[0]); return htmlBlocks[blockIndex]; } else { let processed = part; processed = processed.replace(/\n\n+/g, '

'); processed = processed.replace(/([^\n>])\n([^\n<])/g, '$1
$2'); if (processed.trim() && !processed.trim().startsWith('<')) { processed = '

' + processed + '

'; } return processed; } }); html = processedParts.join(''); html = html.replace(/

<\/p>/g, ''); html = html.replace(/

\s*
\s*<\/p>/g, ''); html = html.replace(/

\s*<\/p>/g, ''); return html; }; const handleDownload = async (format) => { if (!extractionResult || !extractionResult.fields) { console.error("No extraction data available"); return; } setDownloading(format); try { const fields = extractionResult.fields; let content = ""; let filename = ""; let mimeType = ""; if (format === "json") { const preparedFields = prepareFieldsForOutput(fields, "json"); content = JSON.stringify(preparedFields, null, 2); filename = `extraction_${new Date().toISOString().split('T')[0]}.json`; mimeType = "application/json"; } else if (format === "xml") { content = objectToXML(fields); filename = `extraction_${new Date().toISOString().split('T')[0]}.xml`; mimeType = "application/xml"; } else if (format === "docx") { // For DOCX, create a Word-compatible HTML document that preserves layout // Extract text and convert to HTML (same as text viewer) const textContent = extractTextFromFields(fields); const htmlContent = renderMarkdownToHTML(textContent); // Create a Word-compatible HTML document with proper MIME type // Word can open HTML files with .docx extension if we use the right MIME type const wordHTML = ` Document Extraction ${htmlContent} `; content = wordHTML; filename = `extraction_${new Date().toISOString().split('T')[0]}.doc`; mimeType = "application/msword"; } // Create blob and download const blob = new Blob([content], { type: mimeType }); const url = URL.createObjectURL(blob); const link = document.createElement("a"); link.href = url; link.download = filename; document.body.appendChild(link); link.click(); document.body.removeChild(link); URL.revokeObjectURL(url); setDownloading(null); } catch (error) { console.error("Download error:", error); setDownloading(null); } }; const handleCopyLink = async () => { if (!extractionResult?.id) return; setIsGeneratingLink(true); setIsShareLinkModalOpen(true); setShareLink(""); try { const result = await createShareLink(extractionResult.id); if (result.success && result.share_link) { setShareLink(result.share_link); } else { throw new Error("Failed to generate share link"); } } catch (err) { console.error("Failed to create share link:", err); setShareLink(""); // Still show modal but with error state } finally { setIsGeneratingLink(false); } }; const handleShare = async (extractionId, recipientEmail) => { await shareExtraction(extractionId, recipientEmail); }; if (!isComplete) return null; return ( {/* Export Options Dropdown */} setIsShareModalOpen(true)} > Share output Copy share link handleDownload("docx")} disabled={downloading === "docx"} > {downloading === "docx" ? ( ) : ( )} Download Docx handleDownload("json")} disabled={downloading === "json"} > {downloading === "json" ? ( ) : ( )} Download JSON handleDownload("xml")} disabled={downloading === "xml"} > {downloading === "xml" ? ( ) : ( )} Download XML {/* Share Modal */} setIsShareModalOpen(false)} onShare={handleShare} extractionId={extractionResult?.id} /> {/* Share Link Modal */} { setIsShareLinkModalOpen(false); setShareLink(""); }} shareLink={shareLink} isLoading={isGeneratingLink} /> ); }