EZOFISOCR

Running

EZOFISOCR / frontend /src /components /ExportButtons.jsx

Seth

update

89a3828 about 2 months ago

22.4 kB

	import React, { useState } from "react";
	import { motion, AnimatePresence } from "framer-motion";
	import {
	Download,
	Braces,
	FileCode2,
	Check,
	Share2,
	FileText,
	Link2,
	Mail,
	} from "lucide-react";
	import { Button } from "@/components/ui/button";
	import {
	DropdownMenu,
	DropdownMenuContent,
	DropdownMenuItem,
	DropdownMenuSeparator,
	DropdownMenuTrigger,
	} from "@/components/ui/dropdown-menu";
	import { cn } from "@/lib/utils";
	import ShareModal from "@/components/ShareModal";
	import ShareLinkModal from "@/components/ShareLinkModal";
	import { shareExtraction, createShareLink } from "@/services/api";

	// Helper functions from ExtractionOutput
	function prepareFieldsForOutput(fields, format = "json") {
	if (!fields \|\| typeof fields !== "object") {
	return fields;
	}

	const output = { ...fields };

	// Extract Fields from root level if it exists
	const rootFields = output.Fields;
	// Remove Fields from output temporarily (will be added back at top)
	delete output.Fields;

	// Remove full_text from top-level if pages array exists (to avoid duplication)
	if (output.pages && Array.isArray(output.pages) && output.pages.length > 0) {
	delete output.full_text;

	// Clean up each page: remove full_text from page.fields (it duplicates page.text)
	output.pages = output.pages.map(page => {
	const cleanedPage = { ...page };
	if (cleanedPage.fields && typeof cleanedPage.fields === "object") {
	const cleanedFields = { ...cleanedPage.fields };
	// Remove full_text from page fields (duplicates page.text)
	delete cleanedFields.full_text;
	cleanedPage.fields = cleanedFields;
	}
	return cleanedPage;
	});
	}

	// For JSON and XML: restructure pages into separate top-level fields (page_1, page_2, etc.)
	if ((format === "json" \|\| format === "xml") && output.pages && Array.isArray(output.pages)) {
	// Get top-level field keys (these are merged from all pages - avoid duplicating in page fields)
	const topLevelKeys = new Set(Object.keys(output).filter(k => k !== "pages" && k !== "full_text" && k !== "Fields"));

	output.pages.forEach((page, idx) => {
	const pageNum = page.page_number \|\| idx + 1;
	const pageFields = page.fields \|\| {};

	// Remove duplicate fields from page.fields:
	// 1. Remove full_text (duplicates page.text)
	// 2. Remove fields that match top-level fields (already shown at root)
	const cleanedPageFields = {};
	for (const [key, value] of Object.entries(pageFields)) {
	// Skip full_text and fields that match top-level exactly
	if (key !== "full_text" && (!topLevelKeys.has(key) \|\| (value !== output[key]))) {
	cleanedPageFields[key] = value;
	}
	}

	const pageObj = {
	text: page.text \|\| "",
	confidence: page.confidence \|\| 0,
	doc_type: page.doc_type \|\| "other"
	};

	// Add table and footer_notes if they exist
	if (page.table && Array.isArray(page.table) && page.table.length > 0) {
	pageObj.table = page.table;
	}
	if (page.footer_notes && Array.isArray(page.footer_notes) && page.footer_notes.length > 0) {
	pageObj.footer_notes = page.footer_notes;
	}

	// Only add fields if there are unique page-specific fields
	if (Object.keys(cleanedPageFields).length > 0) {
	pageObj.fields = cleanedPageFields;
	}

	output[`page_${pageNum}`] = pageObj;
	});
	// Remove pages array - we now have page_1, page_2, etc. as separate fields
	delete output.pages;
	}

	// Handle page_X structure (from backend) - remove Fields from page objects if they exist
	if (output && typeof output === "object") {
	const pageKeys = Object.keys(output).filter(k => k.startsWith("page_"));
	for (const pageKey of pageKeys) {
	const pageData = output[pageKey];
	if (pageData && typeof pageData === "object") {
	// Remove Fields from page objects (it's now at root level)
	delete pageData.Fields;
	delete pageData.metadata;
	}
	}
	}

	// Rebuild output with Fields at the top (only if it exists and is not empty)
	const finalOutput = {};
	if (rootFields && typeof rootFields === "object" && Object.keys(rootFields).length > 0) {
	finalOutput.Fields = rootFields;
	}

	// Add all other keys
	Object.keys(output).forEach(key => {
	finalOutput[key] = output[key];
	});

	return finalOutput;
	}

	function escapeXML(str) {
	return str
	.replace(/&/g, "&")
	.replace(/</g, "<")
	.replace(/>/g, ">")
	.replace(/"/g, """)
	.replace(/'/g, "'");
	}

	function objectToXML(obj, rootName = "extraction") {
	// Prepare fields - remove full_text if pages exist
	const preparedObj = prepareFieldsForOutput(obj, "xml");

	let xml = `<?xml version="1.0" encoding="UTF-8"?>\n<${rootName}>\n`;

	const convert = (obj, indent = " ") => {
	for (const [key, value] of Object.entries(obj)) {
	if (value === null \|\| value === undefined) continue;

	// Skip full_text if pages exist (already handled in prepareFieldsForOutput)
	if (key === "full_text" && obj.pages && Array.isArray(obj.pages) && obj.pages.length > 0) {
	continue;
	}

	if (Array.isArray(value)) {
	value.forEach((item) => {
	xml += `${indent}<${key}>\n`;
	if (typeof item === "object") {
	convert(item, indent + " ");
	} else {
	xml += `${indent} ${escapeXML(String(item))}\n`;
	}
	xml += `${indent}</${key}>\n`;
	});
	} else if (typeof value === "object") {
	xml += `${indent}<${key}>\n`;
	convert(value, indent + " ");
	xml += `${indent}</${key}>\n`;
	} else {
	xml += `${indent}<${key}>${escapeXML(String(value))}</${key}>\n`;
	}
	}
	};

	convert(preparedObj);
	xml += `</${rootName}>`;
	return xml;
	}

	export default function ExportButtons({ isComplete, extractionResult }) {
	const [downloading, setDownloading] = useState(null);
	const [copied, setCopied] = useState(false);
	const [isShareModalOpen, setIsShareModalOpen] = useState(false);
	const [isShareLinkModalOpen, setIsShareLinkModalOpen] = useState(false);
	const [shareLink, setShareLink] = useState("");
	const [isGeneratingLink, setIsGeneratingLink] = useState(false);

	// Helper function to extract text from fields (same as in ExtractionOutput)
	const extractTextFromFields = (fields) => {
	if (!fields \|\| typeof fields !== "object") {
	return "";
	}

	// Check for page_X structure first (preferred format)
	const pageKeys = Object.keys(fields).filter(key => key.startsWith("page_"));
	if (pageKeys.length > 0) {
	// Get text from first page (or combine all pages)
	const pageTexts = pageKeys.map(key => {
	const page = fields[key];
	if (page && page.text) {
	return page.text;
	}
	return "";
	}).filter(text => text);

	if (pageTexts.length > 0) {
	return pageTexts.join("\n\n");
	}
	}

	// Fallback to full_text
	if (fields.full_text) {
	return fields.full_text;
	}

	return "";
	};

	// Helper function to escape HTML
	const escapeHtml = (text) => {
	if (!text) return '';
	const div = document.createElement('div');
	div.textContent = text;
	return div.innerHTML;
	};

	// Helper function to convert pipe-separated tables to HTML tables
	const convertPipeTablesToHTML = (text) => {
	if (!text) return text;

	const lines = text.split('\n');
	const result = [];
	let i = 0;

	while (i < lines.length) {
	const line = lines[i];

	// Check if this line looks like a table row (has multiple pipes)
	if (line.includes('\|') && line.split('\|').length >= 3) {
	// Check if it's a separator line (only \|, -, :, spaces)
	const isSeparator = /^[\s\|\-:]+$/.test(line.trim());

	if (!isSeparator) {
	// Start of a table - collect all table rows
	const tableRows = [];
	let j = i;

	// Collect header row
	const headerLine = lines[j];
	const headerCells = headerLine.split('\|').map(cell => cell.trim()).filter(cell => cell \|\| cell === '');
	// Remove empty cells at start/end
	if (headerCells.length > 0 && !headerCells[0]) headerCells.shift();
	if (headerCells.length > 0 && !headerCells[headerCells.length - 1]) headerCells.pop();

	if (headerCells.length >= 2) {
	tableRows.push(headerCells);
	j++;

	// Skip separator line if present
	if (j < lines.length && /^[\s\|\-:]+$/.test(lines[j].trim())) {
	j++;
	}

	// Collect data rows
	while (j < lines.length) {
	const rowLine = lines[j];
	if (!rowLine.trim()) break; // Empty line ends table

	// Check if it's still a table row
	if (rowLine.includes('\|') && rowLine.split('\|').length >= 2) {
	const isRowSeparator = /^[\s\|\-:]+$/.test(rowLine.trim());
	if (!isRowSeparator) {
	const rowCells = rowLine.split('\|').map(cell => cell.trim());
	// Remove empty cells at start/end
	if (rowCells.length > 0 && !rowCells[0]) rowCells.shift();
	if (rowCells.length > 0 && !rowCells[rowCells.length - 1]) rowCells.pop();
	tableRows.push(rowCells);
	j++;
	} else {
	j++;
	}
	} else {
	break; // Not a table row anymore
	}
	}

	// Convert to HTML table
	if (tableRows.length > 0) {
	let htmlTable = '<table class="border-collapse border border-gray-300 w-full my-4">\n<thead>\n<tr>';

	// Header row
	tableRows[0].forEach(cell => {
	htmlTable += `<th class="border border-gray-300 px-4 py-2 bg-gray-100 font-semibold text-left">${escapeHtml(cell)}</th>`;
	});
	htmlTable += '</tr>\n</thead>\n<tbody>\n';

	// Data rows
	for (let rowIdx = 1; rowIdx < tableRows.length; rowIdx++) {
	htmlTable += '<tr>';
	tableRows[rowIdx].forEach((cell, colIdx) => {
	// Use header cell count to ensure alignment
	const cellContent = cell \|\| '';
	htmlTable += `<td class="border border-gray-300 px-4 py-2">${escapeHtml(cellContent)}</td>`;
	});
	htmlTable += '</tr>\n';
	}

	htmlTable += '</tbody>\n</table>';
	result.push(htmlTable);
	i = j;
	continue;
	}
	}
	}
	}

	// Not a table row, add as-is
	result.push(line);
	i++;
	}

	return result.join('\n');
	};

	// Helper function to render markdown to HTML (same as in ExtractionOutput)
	const renderMarkdownToHTML = (text) => {
	if (!text) return "";

	let html = text;

	// FIRST: Convert pipe-separated tables to HTML tables
	html = convertPipeTablesToHTML(html);

	// Convert LaTeX-style superscripts/subscripts FIRST
	html = html.replace(/\$\s\^\s\{([^}]+)\}\s*\$/g, '<sup>$1</sup>');
	html = html.replace(/\$\s\^\s([^\s$<>]+)\s*\$/g, '<sup>$1</sup>');
	html = html.replace(/\$\s_\s\{([^}]+)\}\s*\$/g, '<sub>$1</sub>');
	html = html.replace(/\$\s_\s([^\s$<>]+)\s*\$/g, '<sub>$1</sub>');

	// Protect HTML table blocks
	const htmlBlocks = [];
	let htmlBlockIndex = 0;

	html = html.replace(/<table[\s\S]*?<\/table>/gi, (match) => {
	const placeholder = `__HTML_BLOCK_${htmlBlockIndex}__`;
	htmlBlocks[htmlBlockIndex] = match;
	htmlBlockIndex++;
	return placeholder;
	});

	// Convert markdown headers
	html = html.replace(/^### (.*$)/gim, '<h3>$1</h3>');
	html = html.replace(/^## (.*$)/gim, '<h2>$1</h2>');
	html = html.replace(/^# (.*$)/gim, '<h1>$1</h1>');

	// Convert markdown bold/italic
	html = html.replace(/\\(.?)\\*/g, '<strong>$1</strong>');
	html = html.replace(/\(.?)\*/g, '<em>$1</em>');

	// Convert markdown links
	html = html.replace(/\[([^\]]+)\]$([^)]+)$/g, '<a href="$2">$1</a>');

	// Process line breaks
	const parts = html.split(/(__HTML_BLOCK_\d+__)/);
	const processedParts = parts.map((part) => {
	if (part.match(/^__HTML_BLOCK_\d+__$/)) {
	const blockIndex = parseInt(part.match(/\d+/)[0]);
	return htmlBlocks[blockIndex];
	} else {
	let processed = part;
	processed = processed.replace(/\n\n+/g, '</p><p>');
	processed = processed.replace(/([^\n>])\n([^\n<])/g, '$1<br>$2');
	if (processed.trim() && !processed.trim().startsWith('<')) {
	processed = '<p>' + processed + '</p>';
	}
	return processed;
	}
	});

	html = processedParts.join('');
	html = html.replace(/<p><\/p>/g, '');
	html = html.replace(/<p>\s<br>\s<\/p>/g, '');
	html = html.replace(/<p>\s*<\/p>/g, '');

	return html;
	};

	const handleDownload = async (format) => {
	if (!extractionResult \|\| !extractionResult.fields) {
	console.error("No extraction data available");
	return;
	}

	setDownloading(format);

	try {
	const fields = extractionResult.fields;
	let content = "";
	let filename = "";
	let mimeType = "";

	if (format === "json") {
	const preparedFields = prepareFieldsForOutput(fields, "json");
	content = JSON.stringify(preparedFields, null, 2);
	filename = `extraction_${new Date().toISOString().split('T')[0]}.json`;
	mimeType = "application/json";
	} else if (format === "xml") {
	content = objectToXML(fields);
	filename = `extraction_${new Date().toISOString().split('T')[0]}.xml`;
	mimeType = "application/xml";
	} else if (format === "docx") {
	// For DOCX, create a Word-compatible HTML document that preserves layout
	// Extract text and convert to HTML (same as text viewer)
	const textContent = extractTextFromFields(fields);
	const htmlContent = renderMarkdownToHTML(textContent);

	// Create a Word-compatible HTML document with proper MIME type
	// Word can open HTML files with .docx extension if we use the right MIME type
	const wordHTML = `<!DOCTYPE html>
	<html xmlns:o="urn:schemas-microsoft-com:office:office" xmlns:w="urn:schemas-microsoft-com:office:word" xmlns="http://www.w3.org/TR/REC-html40">
	<head>
	<meta charset="UTF-8">
	<meta name="ProgId" content="Word.Document">
	<meta name="Generator" content="Microsoft Word">
	<meta name="Originator" content="Microsoft Word">
	<!--[if gte mso 9]><xml>
	<w:WordDocument>
	<w:View>Print</w:View>
	<w:Zoom>100</w:Zoom>
	<w:DoNotOptimizeForBrowser/>
	</w:WordDocument>
	</xml><![endif]-->
	<title>Document Extraction</title>
	<style>
	@page {
	size: 8.5in 11in;
	margin: 1in;
	}
	body {
	font-family: 'Calibri', 'Arial', sans-serif;
	font-size: 11pt;
	line-height: 1.6;
	margin: 0;
	color: #333;
	}
	h1 {
	font-size: 18pt;
	font-weight: bold;
	color: #0f172a;
	margin-top: 24pt;
	margin-bottom: 12pt;
	page-break-after: avoid;
	}
	h2 {
	font-size: 16pt;
	font-weight: 600;
	color: #0f172a;
	margin-top: 20pt;
	margin-bottom: 10pt;
	page-break-after: avoid;
	}
	h3 {
	font-size: 14pt;
	font-weight: 600;
	color: #1e293b;
	margin-top: 16pt;
	margin-bottom: 8pt;
	page-break-after: avoid;
	}
	p {
	margin-top: 6pt;
	margin-bottom: 6pt;
	}
	table {
	width: 100%;
	border-collapse: collapse;
	margin: 12pt 0;
	font-size: 10pt;
	page-break-inside: avoid;
	}
	table th {
	background-color: #f8fafc;
	border: 1pt solid #cbd5e1;
	padding: 6pt;
	text-align: left;
	font-weight: 600;
	color: #0f172a;
	}
	table td {
	border: 1pt solid #cbd5e1;
	padding: 6pt;
	color: #334155;
	}
	table tr:nth-child(even) {
	background-color: #f8fafc;
	}
	sup {
	font-size: 0.75em;
	vertical-align: super;
	line-height: 0;
	}
	sub {
	font-size: 0.75em;
	vertical-align: sub;
	line-height: 0;
	}
	strong {
	font-weight: 600;
	}
	em {
	font-style: italic;
	}
	a {
	color: #4f46e5;
	text-decoration: underline;
	}
	</style>
	</head>
	<body>
	${htmlContent}
	</body>
	</html>`;

	content = wordHTML;
	filename = `extraction_${new Date().toISOString().split('T')[0]}.doc`;
	mimeType = "application/msword";
	}

	// Create blob and download
	const blob = new Blob([content], { type: mimeType });
	const url = URL.createObjectURL(blob);
	const link = document.createElement("a");
	link.href = url;
	link.download = filename;
	document.body.appendChild(link);
	link.click();
	document.body.removeChild(link);
	URL.revokeObjectURL(url);

	setDownloading(null);
	} catch (error) {
	console.error("Download error:", error);
	setDownloading(null);
	}
	};

	const handleCopyLink = async () => {
	if (!extractionResult?.id) return;

	setIsGeneratingLink(true);
	setIsShareLinkModalOpen(true);
	setShareLink("");

	try {
	const result = await createShareLink(extractionResult.id);
	if (result.success && result.share_link) {
	setShareLink(result.share_link);
	} else {
	throw new Error("Failed to generate share link");
	}
	} catch (err) {
	console.error("Failed to create share link:", err);
	setShareLink("");
	// Still show modal but with error state
	} finally {
	setIsGeneratingLink(false);
	}
	};

	const handleShare = async (extractionId, recipientEmail) => {
	await shareExtraction(extractionId, recipientEmail);
	};

	if (!isComplete) return null;

	return (
	<motion.div
	initial={{ opacity: 0, y: 20 }}
	animate={{ opacity: 1, y: 0 }}
	className="flex items-center gap-3"
	>
	{/* Export Options Dropdown */}
	<DropdownMenu>
	<DropdownMenuTrigger asChild>
	<Button
	variant="ghost"
	className="h-11 w-11 rounded-xl hover:bg-slate-100"
	disabled={downloading !== null}
	>
	{downloading ? (
	<motion.div
	animate={{ rotate: 360 }}
	transition={{ duration: 1, repeat: Infinity, ease: "linear" }}
	>
	<Download className="h-4 w-4" />
	</motion.div>
	) : (
	<Share2 className="h-4 w-4" />
	)}
	</Button>
	</DropdownMenuTrigger>
	<DropdownMenuContent align="end" className="w-56 rounded-xl p-2">
	<DropdownMenuItem
	className="rounded-lg cursor-pointer"
	onClick={() => setIsShareModalOpen(true)}
	>
	<Mail className="h-4 w-4 mr-2 text-indigo-600" />
	Share output
	</DropdownMenuItem>
	<DropdownMenuItem
	className="rounded-lg cursor-pointer"
	onClick={handleCopyLink}
	>
	<Link2 className="h-4 w-4 mr-2 text-indigo-600" />
	Copy share link
	</DropdownMenuItem>
	<DropdownMenuSeparator />
	<DropdownMenuItem
	className="rounded-lg cursor-pointer"
	onClick={() => handleDownload("docx")}
	disabled={downloading === "docx"}
	>
	{downloading === "docx" ? (
	<motion.div
	animate={{ rotate: 360 }}
	transition={{ duration: 1, repeat: Infinity, ease: "linear" }}
	className="h-4 w-4 mr-2"
	>
	<Download className="h-4 w-4" />
	</motion.div>
	) : (
	<FileText className="h-4 w-4 mr-2 text-blue-600" />
	)}
	Download Docx
	</DropdownMenuItem>
	<DropdownMenuItem
	className="rounded-lg cursor-pointer"
	onClick={() => handleDownload("json")}
	disabled={downloading === "json"}
	>
	{downloading === "json" ? (
	<motion.div
	animate={{ rotate: 360 }}
	transition={{ duration: 1, repeat: Infinity, ease: "linear" }}
	className="h-4 w-4 mr-2"
	>
	<Download className="h-4 w-4" />
	</motion.div>
	) : (
	<Braces className="h-4 w-4 mr-2 text-indigo-600" />
	)}
	Download JSON
	</DropdownMenuItem>
	<DropdownMenuItem
	className="rounded-lg cursor-pointer"
	onClick={() => handleDownload("xml")}
	disabled={downloading === "xml"}
	>
	{downloading === "xml" ? (
	<motion.div
	animate={{ rotate: 360 }}
	transition={{ duration: 1, repeat: Infinity, ease: "linear" }}
	className="h-4 w-4 mr-2"
	>
	<Download className="h-4 w-4" />
	</motion.div>
	) : (
	<FileCode2 className="h-4 w-4 mr-2 text-slate-600" />
	)}
	Download XML
	</DropdownMenuItem>
	</DropdownMenuContent>
	</DropdownMenu>

	{/* Share Modal */}
	<ShareModal
	isOpen={isShareModalOpen}
	onClose={() => setIsShareModalOpen(false)}
	onShare={handleShare}
	extractionId={extractionResult?.id}
	/>

	{/* Share Link Modal */}
	<ShareLinkModal
	isOpen={isShareLinkModalOpen}
	onClose={() => {
	setIsShareLinkModalOpen(false);
	setShareLink("");
	}}
	shareLink={shareLink}
	isLoading={isGeneratingLink}
	/>
	</motion.div>
	);
	}