import React, { useState, useEffect } from "react"; import { motion, AnimatePresence } from "framer-motion"; import { Code2, Copy, Check, Braces, FileCode2, FileText, Sparkles, ChevronDown, } from "lucide-react"; import { Button } from "@/components/ui/button"; import { Tabs, TabsList, TabsTrigger } from "@/components/ui/tabs"; import { cn } from "@/lib/utils"; // Mock extracted data const mockData = { document: { type: "Invoice", confidence: 0.98, }, vendor: { name: "Acme Corporation", address: "123 Business Ave, Suite 400", city: "San Francisco", state: "CA", zip: "94102", phone: "+1 (555) 123-4567", }, invoice: { number: "INV-2024-0847", date: "2024-01-15", due_date: "2024-02-14", po_number: "PO-9823", }, items: [ { description: "Professional Services", quantity: 40, unit_price: 150.0, total: 6000.0 }, { description: "Software License", quantity: 5, unit_price: 299.99, total: 1499.95 }, { description: "Support Package", quantity: 1, unit_price: 500.0, total: 500.0 }, ], totals: { subtotal: 7999.95, tax_rate: 0.0875, tax_amount: 699.99, total: 8699.94, }, }; const mockXML = ` Acme Corporation
123 Business Ave, Suite 400
San Francisco CA 94102
INV-2024-0847 2024-01-15 2024-02-14 Professional Services 40 6000.00 7999.95 699.99 8699.94
`; const mockText = `INVOICE ACME CORPORATION 123 Business Ave, Suite 400 San Francisco, CA 94102 Phone: +1 (555) 123-4567 Invoice Number: INV-2024-0847 Invoice Date: January 15, 2024 Due Date: February 14, 2024 PO Number: PO-9823 BILL TO: Customer Name 456 Client Street New York, NY 10001 ITEMS: ───────────────────────────────────────────────────────── Description Qty Unit Price Total ───────────────────────────────────────────────────────── Professional Services 40 $150.00 $6,000.00 Software License 5 $299.99 $1,499.95 Support Package 1 $500.00 $500.00 ───────────────────────────────────────────────────────── Subtotal: $7,999.95 Tax (8.75%): $699.99 ───────────────────────── TOTAL: $8,699.94 Payment Terms: Net 30 Thank you for your business!`; // Helper function to convert object to XML // Prepare fields for JSON/XML output - remove duplicates and restructure function prepareFieldsForOutput(fields, format = "json") { if (!fields || typeof fields !== "object") { return fields; } const output = { ...fields }; // Remove full_text from top-level if pages array exists (to avoid duplication) if (output.pages && Array.isArray(output.pages) && output.pages.length > 0) { delete output.full_text; // Clean up each page: remove full_text from page.fields (it duplicates page.text) output.pages = output.pages.map(page => { const cleanedPage = { ...page }; if (cleanedPage.fields && typeof cleanedPage.fields === "object") { const cleanedFields = { ...cleanedPage.fields }; // Remove full_text from page fields (duplicates page.text) delete cleanedFields.full_text; cleanedPage.fields = cleanedFields; } return cleanedPage; }); } // For JSON and XML: restructure pages into separate top-level fields (page_1, page_2, etc.) if ((format === "json" || format === "xml") && output.pages && Array.isArray(output.pages)) { // Get top-level field keys (these are merged from all pages - avoid duplicating in page fields) const topLevelKeys = new Set(Object.keys(output).filter(k => k !== "pages" && k !== "full_text")); output.pages.forEach((page, idx) => { const pageNum = page.page_number || idx + 1; const pageFields = page.fields || {}; // Remove duplicate fields from page.fields: // 1. Remove full_text (duplicates page.text) // 2. Remove fields that match top-level fields (already shown at root) const cleanedPageFields = {}; for (const [key, value] of Object.entries(pageFields)) { // Skip full_text and fields that match top-level exactly if (key !== "full_text" && (!topLevelKeys.has(key) || (value !== output[key]))) { cleanedPageFields[key] = value; } } const pageObj = { text: page.text || "", confidence: page.confidence || 0, doc_type: page.doc_type || "other" }; // Only add fields if there are unique page-specific fields if (Object.keys(cleanedPageFields).length > 0) { pageObj.fields = cleanedPageFields; } output[`page_${pageNum}`] = pageObj; }); // Remove pages array - we now have page_1, page_2, etc. as separate fields delete output.pages; } return output; } function objectToXML(obj, rootName = "extraction") { // Prepare fields - remove full_text if pages exist const preparedObj = prepareFieldsForOutput(obj, "xml"); let xml = `\n<${rootName}>\n`; const convert = (obj, indent = " ") => { for (const [key, value] of Object.entries(obj)) { if (value === null || value === undefined) continue; // Skip full_text if pages exist (already handled in prepareFieldsForOutput) if (key === "full_text" && obj.pages && Array.isArray(obj.pages) && obj.pages.length > 0) { continue; } if (Array.isArray(value)) { value.forEach((item) => { xml += `${indent}<${key}>\n`; if (typeof item === "object") { convert(item, indent + " "); } else { xml += `${indent} ${escapeXML(String(item))}\n`; } xml += `${indent}\n`; }); } else if (typeof value === "object") { xml += `${indent}<${key}>\n`; convert(value, indent + " "); xml += `${indent}\n`; } else { xml += `${indent}<${key}>${escapeXML(String(value))}\n`; } } }; convert(preparedObj); xml += ``; return xml; } function escapeXML(str) { return str .replace(/&/g, "&") .replace(//g, ">") .replace(/"/g, """) .replace(/'/g, "'"); } // Helper function to format fields as readable text function fieldsToText(fields) { if (!fields || typeof fields !== "object") { return "No data extracted."; } // If full_text exists, show it prominently first if (fields.full_text) { let text = "=== FULL EXTRACTED TEXT ===\n\n"; text += fields.full_text; // Don't show pages array separately if full_text already contains page markers // (full_text from backend already includes "=== PAGE 1 ===" etc.) const hasPageMarkers = fields.full_text.includes("=== PAGE") || fields.full_text.includes("--- Page"); // Only show pages array if full_text doesn't already have page breakdown if (!hasPageMarkers && fields.pages && Array.isArray(fields.pages)) { text += "\n\n=== TEXT BY PAGE ===\n\n"; fields.pages.forEach((page, idx) => { text += `--- Page ${page.page_number || idx + 1} ---\n`; text += page.text || ""; text += "\n\n"; }); } // Then show other structured fields const otherFields = { ...fields }; delete otherFields.full_text; delete otherFields.pages; if (Object.keys(otherFields).length > 0) { text += "\n\n=== STRUCTURED FIELDS ===\n\n"; const formatValue = (key, value, indent = "") => { if (Array.isArray(value)) { text += `${indent}${key}:\n`; value.forEach((item, idx) => { if (typeof item === "object") { text += `${indent} Item ${idx + 1}:\n`; Object.entries(item).forEach(([k, v]) => formatValue(k, v, indent + " ")); } else { text += `${indent} - ${item}\n`; } }); } else if (typeof value === "object" && value !== null) { text += `${indent}${key}:\n`; Object.entries(value).forEach(([k, v]) => formatValue(k, v, indent + " ")); } else { text += `${indent}${key}: ${value}\n`; } }; Object.entries(otherFields).forEach(([key, value]) => { formatValue(key, value); text += "\n"; }); } return text.trim(); } // Fallback: format all fields normally let text = ""; const formatValue = (key, value, indent = "") => { if (Array.isArray(value)) { text += `${indent}${key}:\n`; value.forEach((item, idx) => { if (typeof item === "object") { text += `${indent} Item ${idx + 1}:\n`; Object.entries(item).forEach(([k, v]) => formatValue(k, v, indent + " ")); } else { text += `${indent} - ${item}\n`; } }); } else if (typeof value === "object" && value !== null) { text += `${indent}${key}:\n`; Object.entries(value).forEach(([k, v]) => formatValue(k, v, indent + " ")); } else { text += `${indent}${key}: ${value}\n`; } }; Object.entries(fields).forEach(([key, value]) => { formatValue(key, value); text += "\n"; }); return text.trim() || "No data extracted."; } export default function ExtractionOutput({ hasFile, isProcessing, isComplete, extractionResult }) { const [activeTab, setActiveTab] = useState("json"); const [copied, setCopied] = useState(false); // Get fields from extraction result, default to empty object const fields = extractionResult?.fields || {}; const confidence = extractionResult?.confidence || 0; const fieldsExtracted = extractionResult?.fieldsExtracted || 0; const totalTime = extractionResult?.totalTime || 0; // Initialize expanded sections based on available fields const [expandedSections, setExpandedSections] = useState(() => Object.keys(fields).slice(0, 5) // Expand first 5 sections by default ); const handleCopy = () => { let content = ""; if (activeTab === "json") { const preparedFields = prepareFieldsForOutput(fields, "json"); content = JSON.stringify(preparedFields, null, 2); } else if (activeTab === "xml") { content = objectToXML(fields); } else { content = fieldsToText(fields); } navigator.clipboard.writeText(content); setCopied(true); setTimeout(() => setCopied(false), 2000); }; // Get prepared fields for display const preparedFields = React.useMemo(() => { return prepareFieldsForOutput(fields, "json"); }, [fields]); // Update expanded sections when fields change React.useEffect(() => { if (extractionResult?.fields) { setExpandedSections(Object.keys(extractionResult.fields).slice(0, 5)); } }, [extractionResult]); const toggleSection = (section) => { setExpandedSections((prev) => prev.includes(section) ? prev.filter((s) => s !== section) : [...prev, section] ); }; const renderValue = (value) => { if (typeof value === "number") { return {value}; } if (typeof value === "string") { return "{value}"; } return String(value); }; const renderSection = (key, value, level = 0) => { const isExpanded = expandedSections.includes(key); const isObject = typeof value === "object" && value !== null; const isArray = Array.isArray(value); if (!isObject) { return (
"{key}" : {renderValue(value)}
); } return (
{isExpanded && ( {isArray ? ( value.map((item, idx) => (
{Object.entries(item).map(([k, v]) => renderSection(k, v, level + 2))} {idx < value.length - 1 &&
}
)) ) : ( Object.entries(value).map(([k, v]) => renderSection(k, v, level + 1)) )}
{isArray ? "]" : "}"}
)}
); }; return (
{/* Header */}

Extracted Data

{isComplete ? `${fieldsExtracted} field${fieldsExtracted !== 1 ? 's' : ''} extracted` : "Waiting for extraction"}

{isComplete && (
Text JSON XML
)}
{/* Output Area */}
{!hasFile ? (

Extracted data will appear here

) : isProcessing ? (

Extracting data...

Analyzing document structure

{[0, 1, 2].map((i) => ( ))}
) : isComplete && Object.keys(fields).length === 0 ? (

No data extracted

The document may not contain extractable fields

) : (
{activeTab === "text" ? (
                {fieldsToText(fields)}
              
) : activeTab === "json" ? (
{"{"} {Object.keys(preparedFields).length > 0 ? ( Object.entries(preparedFields).map(([key, value]) => renderSection(key, value, 1) ) ) : (
No fields extracted
)} {"}"}
) : (
                {objectToXML(fields).split("\n").map((line, i) => (
                  
{line.includes("<") ? ( <> {line.split(/(<\/?[\w\s=".-]+>)/g).map((part, j) => { if (part.startsWith(" {part} ); } if (part.startsWith("<")) { return ( {part} ); } return ( {part} ); })} ) : ( line )}
))}
)}
)}
{/* Confidence Footer */} {isComplete && extractionResult && (
= 90 ? "bg-emerald-500" : confidence >= 70 ? "bg-amber-500" : "bg-red-500" )} /> Confidence: {confidence > 0 ? `${confidence.toFixed(1)}%` : "N/A"}
Fields: {fieldsExtracted}
Processed in {totalTime >= 1000 ? `${(totalTime / 1000).toFixed(1)}s` : `${totalTime}ms`}
)}
); }