EZOFISOCR / frontend /src /components /ExportButtons.jsx
Seth
update
89a3828
import React, { useState } from "react";
import { motion, AnimatePresence } from "framer-motion";
import {
Download,
Braces,
FileCode2,
Check,
Share2,
FileText,
Link2,
Mail,
} from "lucide-react";
import { Button } from "@/components/ui/button";
import {
DropdownMenu,
DropdownMenuContent,
DropdownMenuItem,
DropdownMenuSeparator,
DropdownMenuTrigger,
} from "@/components/ui/dropdown-menu";
import { cn } from "@/lib/utils";
import ShareModal from "@/components/ShareModal";
import ShareLinkModal from "@/components/ShareLinkModal";
import { shareExtraction, createShareLink } from "@/services/api";
// Helper functions from ExtractionOutput
function prepareFieldsForOutput(fields, format = "json") {
if (!fields || typeof fields !== "object") {
return fields;
}
const output = { ...fields };
// Extract Fields from root level if it exists
const rootFields = output.Fields;
// Remove Fields from output temporarily (will be added back at top)
delete output.Fields;
// Remove full_text from top-level if pages array exists (to avoid duplication)
if (output.pages && Array.isArray(output.pages) && output.pages.length > 0) {
delete output.full_text;
// Clean up each page: remove full_text from page.fields (it duplicates page.text)
output.pages = output.pages.map(page => {
const cleanedPage = { ...page };
if (cleanedPage.fields && typeof cleanedPage.fields === "object") {
const cleanedFields = { ...cleanedPage.fields };
// Remove full_text from page fields (duplicates page.text)
delete cleanedFields.full_text;
cleanedPage.fields = cleanedFields;
}
return cleanedPage;
});
}
// For JSON and XML: restructure pages into separate top-level fields (page_1, page_2, etc.)
if ((format === "json" || format === "xml") && output.pages && Array.isArray(output.pages)) {
// Get top-level field keys (these are merged from all pages - avoid duplicating in page fields)
const topLevelKeys = new Set(Object.keys(output).filter(k => k !== "pages" && k !== "full_text" && k !== "Fields"));
output.pages.forEach((page, idx) => {
const pageNum = page.page_number || idx + 1;
const pageFields = page.fields || {};
// Remove duplicate fields from page.fields:
// 1. Remove full_text (duplicates page.text)
// 2. Remove fields that match top-level fields (already shown at root)
const cleanedPageFields = {};
for (const [key, value] of Object.entries(pageFields)) {
// Skip full_text and fields that match top-level exactly
if (key !== "full_text" && (!topLevelKeys.has(key) || (value !== output[key]))) {
cleanedPageFields[key] = value;
}
}
const pageObj = {
text: page.text || "",
confidence: page.confidence || 0,
doc_type: page.doc_type || "other"
};
// Add table and footer_notes if they exist
if (page.table && Array.isArray(page.table) && page.table.length > 0) {
pageObj.table = page.table;
}
if (page.footer_notes && Array.isArray(page.footer_notes) && page.footer_notes.length > 0) {
pageObj.footer_notes = page.footer_notes;
}
// Only add fields if there are unique page-specific fields
if (Object.keys(cleanedPageFields).length > 0) {
pageObj.fields = cleanedPageFields;
}
output[`page_${pageNum}`] = pageObj;
});
// Remove pages array - we now have page_1, page_2, etc. as separate fields
delete output.pages;
}
// Handle page_X structure (from backend) - remove Fields from page objects if they exist
if (output && typeof output === "object") {
const pageKeys = Object.keys(output).filter(k => k.startsWith("page_"));
for (const pageKey of pageKeys) {
const pageData = output[pageKey];
if (pageData && typeof pageData === "object") {
// Remove Fields from page objects (it's now at root level)
delete pageData.Fields;
delete pageData.metadata;
}
}
}
// Rebuild output with Fields at the top (only if it exists and is not empty)
const finalOutput = {};
if (rootFields && typeof rootFields === "object" && Object.keys(rootFields).length > 0) {
finalOutput.Fields = rootFields;
}
// Add all other keys
Object.keys(output).forEach(key => {
finalOutput[key] = output[key];
});
return finalOutput;
}
function escapeXML(str) {
return str
.replace(/&/g, "&")
.replace(/</g, "&lt;")
.replace(/>/g, "&gt;")
.replace(/"/g, "&quot;")
.replace(/'/g, "&apos;");
}
function objectToXML(obj, rootName = "extraction") {
// Prepare fields - remove full_text if pages exist
const preparedObj = prepareFieldsForOutput(obj, "xml");
let xml = `<?xml version="1.0" encoding="UTF-8"?>\n<${rootName}>\n`;
const convert = (obj, indent = " ") => {
for (const [key, value] of Object.entries(obj)) {
if (value === null || value === undefined) continue;
// Skip full_text if pages exist (already handled in prepareFieldsForOutput)
if (key === "full_text" && obj.pages && Array.isArray(obj.pages) && obj.pages.length > 0) {
continue;
}
if (Array.isArray(value)) {
value.forEach((item) => {
xml += `${indent}<${key}>\n`;
if (typeof item === "object") {
convert(item, indent + " ");
} else {
xml += `${indent} ${escapeXML(String(item))}\n`;
}
xml += `${indent}</${key}>\n`;
});
} else if (typeof value === "object") {
xml += `${indent}<${key}>\n`;
convert(value, indent + " ");
xml += `${indent}</${key}>\n`;
} else {
xml += `${indent}<${key}>${escapeXML(String(value))}</${key}>\n`;
}
}
};
convert(preparedObj);
xml += `</${rootName}>`;
return xml;
}
export default function ExportButtons({ isComplete, extractionResult }) {
const [downloading, setDownloading] = useState(null);
const [copied, setCopied] = useState(false);
const [isShareModalOpen, setIsShareModalOpen] = useState(false);
const [isShareLinkModalOpen, setIsShareLinkModalOpen] = useState(false);
const [shareLink, setShareLink] = useState("");
const [isGeneratingLink, setIsGeneratingLink] = useState(false);
// Helper function to extract text from fields (same as in ExtractionOutput)
const extractTextFromFields = (fields) => {
if (!fields || typeof fields !== "object") {
return "";
}
// Check for page_X structure first (preferred format)
const pageKeys = Object.keys(fields).filter(key => key.startsWith("page_"));
if (pageKeys.length > 0) {
// Get text from first page (or combine all pages)
const pageTexts = pageKeys.map(key => {
const page = fields[key];
if (page && page.text) {
return page.text;
}
return "";
}).filter(text => text);
if (pageTexts.length > 0) {
return pageTexts.join("\n\n");
}
}
// Fallback to full_text
if (fields.full_text) {
return fields.full_text;
}
return "";
};
// Helper function to escape HTML
const escapeHtml = (text) => {
if (!text) return '';
const div = document.createElement('div');
div.textContent = text;
return div.innerHTML;
};
// Helper function to convert pipe-separated tables to HTML tables
const convertPipeTablesToHTML = (text) => {
if (!text) return text;
const lines = text.split('\n');
const result = [];
let i = 0;
while (i < lines.length) {
const line = lines[i];
// Check if this line looks like a table row (has multiple pipes)
if (line.includes('|') && line.split('|').length >= 3) {
// Check if it's a separator line (only |, -, :, spaces)
const isSeparator = /^[\s|\-:]+$/.test(line.trim());
if (!isSeparator) {
// Start of a table - collect all table rows
const tableRows = [];
let j = i;
// Collect header row
const headerLine = lines[j];
const headerCells = headerLine.split('|').map(cell => cell.trim()).filter(cell => cell || cell === '');
// Remove empty cells at start/end
if (headerCells.length > 0 && !headerCells[0]) headerCells.shift();
if (headerCells.length > 0 && !headerCells[headerCells.length - 1]) headerCells.pop();
if (headerCells.length >= 2) {
tableRows.push(headerCells);
j++;
// Skip separator line if present
if (j < lines.length && /^[\s|\-:]+$/.test(lines[j].trim())) {
j++;
}
// Collect data rows
while (j < lines.length) {
const rowLine = lines[j];
if (!rowLine.trim()) break; // Empty line ends table
// Check if it's still a table row
if (rowLine.includes('|') && rowLine.split('|').length >= 2) {
const isRowSeparator = /^[\s|\-:]+$/.test(rowLine.trim());
if (!isRowSeparator) {
const rowCells = rowLine.split('|').map(cell => cell.trim());
// Remove empty cells at start/end
if (rowCells.length > 0 && !rowCells[0]) rowCells.shift();
if (rowCells.length > 0 && !rowCells[rowCells.length - 1]) rowCells.pop();
tableRows.push(rowCells);
j++;
} else {
j++;
}
} else {
break; // Not a table row anymore
}
}
// Convert to HTML table
if (tableRows.length > 0) {
let htmlTable = '<table class="border-collapse border border-gray-300 w-full my-4">\n<thead>\n<tr>';
// Header row
tableRows[0].forEach(cell => {
htmlTable += `<th class="border border-gray-300 px-4 py-2 bg-gray-100 font-semibold text-left">${escapeHtml(cell)}</th>`;
});
htmlTable += '</tr>\n</thead>\n<tbody>\n';
// Data rows
for (let rowIdx = 1; rowIdx < tableRows.length; rowIdx++) {
htmlTable += '<tr>';
tableRows[rowIdx].forEach((cell, colIdx) => {
// Use header cell count to ensure alignment
const cellContent = cell || '';
htmlTable += `<td class="border border-gray-300 px-4 py-2">${escapeHtml(cellContent)}</td>`;
});
htmlTable += '</tr>\n';
}
htmlTable += '</tbody>\n</table>';
result.push(htmlTable);
i = j;
continue;
}
}
}
}
// Not a table row, add as-is
result.push(line);
i++;
}
return result.join('\n');
};
// Helper function to render markdown to HTML (same as in ExtractionOutput)
const renderMarkdownToHTML = (text) => {
if (!text) return "";
let html = text;
// FIRST: Convert pipe-separated tables to HTML tables
html = convertPipeTablesToHTML(html);
// Convert LaTeX-style superscripts/subscripts FIRST
html = html.replace(/\$\s*\^\s*\{([^}]+)\}\s*\$/g, '<sup>$1</sup>');
html = html.replace(/\$\s*\^\s*([^\s$<>]+)\s*\$/g, '<sup>$1</sup>');
html = html.replace(/\$\s*_\s*\{([^}]+)\}\s*\$/g, '<sub>$1</sub>');
html = html.replace(/\$\s*_\s*([^\s$<>]+)\s*\$/g, '<sub>$1</sub>');
// Protect HTML table blocks
const htmlBlocks = [];
let htmlBlockIndex = 0;
html = html.replace(/<table[\s\S]*?<\/table>/gi, (match) => {
const placeholder = `__HTML_BLOCK_${htmlBlockIndex}__`;
htmlBlocks[htmlBlockIndex] = match;
htmlBlockIndex++;
return placeholder;
});
// Convert markdown headers
html = html.replace(/^### (.*$)/gim, '<h3>$1</h3>');
html = html.replace(/^## (.*$)/gim, '<h2>$1</h2>');
html = html.replace(/^# (.*$)/gim, '<h1>$1</h1>');
// Convert markdown bold/italic
html = html.replace(/\*\*(.*?)\*\*/g, '<strong>$1</strong>');
html = html.replace(/\*(.*?)\*/g, '<em>$1</em>');
// Convert markdown links
html = html.replace(/\[([^\]]+)\]\(([^)]+)\)/g, '<a href="$2">$1</a>');
// Process line breaks
const parts = html.split(/(__HTML_BLOCK_\d+__)/);
const processedParts = parts.map((part) => {
if (part.match(/^__HTML_BLOCK_\d+__$/)) {
const blockIndex = parseInt(part.match(/\d+/)[0]);
return htmlBlocks[blockIndex];
} else {
let processed = part;
processed = processed.replace(/\n\n+/g, '</p><p>');
processed = processed.replace(/([^\n>])\n([^\n<])/g, '$1<br>$2');
if (processed.trim() && !processed.trim().startsWith('<')) {
processed = '<p>' + processed + '</p>';
}
return processed;
}
});
html = processedParts.join('');
html = html.replace(/<p><\/p>/g, '');
html = html.replace(/<p>\s*<br>\s*<\/p>/g, '');
html = html.replace(/<p>\s*<\/p>/g, '');
return html;
};
const handleDownload = async (format) => {
if (!extractionResult || !extractionResult.fields) {
console.error("No extraction data available");
return;
}
setDownloading(format);
try {
const fields = extractionResult.fields;
let content = "";
let filename = "";
let mimeType = "";
if (format === "json") {
const preparedFields = prepareFieldsForOutput(fields, "json");
content = JSON.stringify(preparedFields, null, 2);
filename = `extraction_${new Date().toISOString().split('T')[0]}.json`;
mimeType = "application/json";
} else if (format === "xml") {
content = objectToXML(fields);
filename = `extraction_${new Date().toISOString().split('T')[0]}.xml`;
mimeType = "application/xml";
} else if (format === "docx") {
// For DOCX, create a Word-compatible HTML document that preserves layout
// Extract text and convert to HTML (same as text viewer)
const textContent = extractTextFromFields(fields);
const htmlContent = renderMarkdownToHTML(textContent);
// Create a Word-compatible HTML document with proper MIME type
// Word can open HTML files with .docx extension if we use the right MIME type
const wordHTML = `<!DOCTYPE html>
<html xmlns:o="urn:schemas-microsoft-com:office:office" xmlns:w="urn:schemas-microsoft-com:office:word" xmlns="http://www.w3.org/TR/REC-html40">
<head>
<meta charset="UTF-8">
<meta name="ProgId" content="Word.Document">
<meta name="Generator" content="Microsoft Word">
<meta name="Originator" content="Microsoft Word">
<!--[if gte mso 9]><xml>
<w:WordDocument>
<w:View>Print</w:View>
<w:Zoom>100</w:Zoom>
<w:DoNotOptimizeForBrowser/>
</w:WordDocument>
</xml><![endif]-->
<title>Document Extraction</title>
<style>
@page {
size: 8.5in 11in;
margin: 1in;
}
body {
font-family: 'Calibri', 'Arial', sans-serif;
font-size: 11pt;
line-height: 1.6;
margin: 0;
color: #333;
}
h1 {
font-size: 18pt;
font-weight: bold;
color: #0f172a;
margin-top: 24pt;
margin-bottom: 12pt;
page-break-after: avoid;
}
h2 {
font-size: 16pt;
font-weight: 600;
color: #0f172a;
margin-top: 20pt;
margin-bottom: 10pt;
page-break-after: avoid;
}
h3 {
font-size: 14pt;
font-weight: 600;
color: #1e293b;
margin-top: 16pt;
margin-bottom: 8pt;
page-break-after: avoid;
}
p {
margin-top: 6pt;
margin-bottom: 6pt;
}
table {
width: 100%;
border-collapse: collapse;
margin: 12pt 0;
font-size: 10pt;
page-break-inside: avoid;
}
table th {
background-color: #f8fafc;
border: 1pt solid #cbd5e1;
padding: 6pt;
text-align: left;
font-weight: 600;
color: #0f172a;
}
table td {
border: 1pt solid #cbd5e1;
padding: 6pt;
color: #334155;
}
table tr:nth-child(even) {
background-color: #f8fafc;
}
sup {
font-size: 0.75em;
vertical-align: super;
line-height: 0;
}
sub {
font-size: 0.75em;
vertical-align: sub;
line-height: 0;
}
strong {
font-weight: 600;
}
em {
font-style: italic;
}
a {
color: #4f46e5;
text-decoration: underline;
}
</style>
</head>
<body>
${htmlContent}
</body>
</html>`;
content = wordHTML;
filename = `extraction_${new Date().toISOString().split('T')[0]}.doc`;
mimeType = "application/msword";
}
// Create blob and download
const blob = new Blob([content], { type: mimeType });
const url = URL.createObjectURL(blob);
const link = document.createElement("a");
link.href = url;
link.download = filename;
document.body.appendChild(link);
link.click();
document.body.removeChild(link);
URL.revokeObjectURL(url);
setDownloading(null);
} catch (error) {
console.error("Download error:", error);
setDownloading(null);
}
};
const handleCopyLink = async () => {
if (!extractionResult?.id) return;
setIsGeneratingLink(true);
setIsShareLinkModalOpen(true);
setShareLink("");
try {
const result = await createShareLink(extractionResult.id);
if (result.success && result.share_link) {
setShareLink(result.share_link);
} else {
throw new Error("Failed to generate share link");
}
} catch (err) {
console.error("Failed to create share link:", err);
setShareLink("");
// Still show modal but with error state
} finally {
setIsGeneratingLink(false);
}
};
const handleShare = async (extractionId, recipientEmail) => {
await shareExtraction(extractionId, recipientEmail);
};
if (!isComplete) return null;
return (
<motion.div
initial={{ opacity: 0, y: 20 }}
animate={{ opacity: 1, y: 0 }}
className="flex items-center gap-3"
>
{/* Export Options Dropdown */}
<DropdownMenu>
<DropdownMenuTrigger asChild>
<Button
variant="ghost"
className="h-11 w-11 rounded-xl hover:bg-slate-100"
disabled={downloading !== null}
>
{downloading ? (
<motion.div
animate={{ rotate: 360 }}
transition={{ duration: 1, repeat: Infinity, ease: "linear" }}
>
<Download className="h-4 w-4" />
</motion.div>
) : (
<Share2 className="h-4 w-4" />
)}
</Button>
</DropdownMenuTrigger>
<DropdownMenuContent align="end" className="w-56 rounded-xl p-2">
<DropdownMenuItem
className="rounded-lg cursor-pointer"
onClick={() => setIsShareModalOpen(true)}
>
<Mail className="h-4 w-4 mr-2 text-indigo-600" />
Share output
</DropdownMenuItem>
<DropdownMenuItem
className="rounded-lg cursor-pointer"
onClick={handleCopyLink}
>
<Link2 className="h-4 w-4 mr-2 text-indigo-600" />
Copy share link
</DropdownMenuItem>
<DropdownMenuSeparator />
<DropdownMenuItem
className="rounded-lg cursor-pointer"
onClick={() => handleDownload("docx")}
disabled={downloading === "docx"}
>
{downloading === "docx" ? (
<motion.div
animate={{ rotate: 360 }}
transition={{ duration: 1, repeat: Infinity, ease: "linear" }}
className="h-4 w-4 mr-2"
>
<Download className="h-4 w-4" />
</motion.div>
) : (
<FileText className="h-4 w-4 mr-2 text-blue-600" />
)}
Download Docx
</DropdownMenuItem>
<DropdownMenuItem
className="rounded-lg cursor-pointer"
onClick={() => handleDownload("json")}
disabled={downloading === "json"}
>
{downloading === "json" ? (
<motion.div
animate={{ rotate: 360 }}
transition={{ duration: 1, repeat: Infinity, ease: "linear" }}
className="h-4 w-4 mr-2"
>
<Download className="h-4 w-4" />
</motion.div>
) : (
<Braces className="h-4 w-4 mr-2 text-indigo-600" />
)}
Download JSON
</DropdownMenuItem>
<DropdownMenuItem
className="rounded-lg cursor-pointer"
onClick={() => handleDownload("xml")}
disabled={downloading === "xml"}
>
{downloading === "xml" ? (
<motion.div
animate={{ rotate: 360 }}
transition={{ duration: 1, repeat: Infinity, ease: "linear" }}
className="h-4 w-4 mr-2"
>
<Download className="h-4 w-4" />
</motion.div>
) : (
<FileCode2 className="h-4 w-4 mr-2 text-slate-600" />
)}
Download XML
</DropdownMenuItem>
</DropdownMenuContent>
</DropdownMenu>
{/* Share Modal */}
<ShareModal
isOpen={isShareModalOpen}
onClose={() => setIsShareModalOpen(false)}
onShare={handleShare}
extractionId={extractionResult?.id}
/>
{/* Share Link Modal */}
<ShareLinkModal
isOpen={isShareLinkModalOpen}
onClose={() => {
setIsShareLinkModalOpen(false);
setShareLink("");
}}
shareLink={shareLink}
isLoading={isGeneratingLink}
/>
</motion.div>
);
}