import React, { useState, useEffect } from "react";
import { motion, AnimatePresence } from "framer-motion";
import {
Code2,
Copy,
Check,
Braces,
FileCode2,
FileText,
Sparkles,
ChevronDown,
} from "lucide-react";
import { Button } from "@/components/ui/button";
import { Tabs, TabsList, TabsTrigger } from "@/components/ui/tabs";
import { cn } from "@/lib/utils";
// Mock extracted data
const mockData = {
document: {
type: "Invoice",
confidence: 0.98,
},
vendor: {
name: "Acme Corporation",
address: "123 Business Ave, Suite 400",
city: "San Francisco",
state: "CA",
zip: "94102",
phone: "+1 (555) 123-4567",
},
invoice: {
number: "INV-2024-0847",
date: "2024-01-15",
due_date: "2024-02-14",
po_number: "PO-9823",
},
items: [
{ description: "Professional Services", quantity: 40, unit_price: 150.0, total: 6000.0 },
{ description: "Software License", quantity: 5, unit_price: 299.99, total: 1499.95 },
{ description: "Support Package", quantity: 1, unit_price: 500.0, total: 500.0 },
],
totals: {
subtotal: 7999.95,
tax_rate: 0.0875,
tax_amount: 699.99,
total: 8699.94,
},
};
const mockXML = `
Acme Corporation
123 Business Ave, Suite 400
San Francisco
CA
94102
INV-2024-0847
2024-01-15
2024-02-14
-
Professional Services
40
6000.00
7999.95
699.99
8699.94
`;
const mockText = `INVOICE
ACME CORPORATION
123 Business Ave, Suite 400
San Francisco, CA 94102
Phone: +1 (555) 123-4567
Invoice Number: INV-2024-0847
Invoice Date: January 15, 2024
Due Date: February 14, 2024
PO Number: PO-9823
BILL TO:
Customer Name
456 Client Street
New York, NY 10001
ITEMS:
─────────────────────────────────────────────────────────
Description Qty Unit Price Total
─────────────────────────────────────────────────────────
Professional Services 40 $150.00 $6,000.00
Software License 5 $299.99 $1,499.95
Support Package 1 $500.00 $500.00
─────────────────────────────────────────────────────────
Subtotal: $7,999.95
Tax (8.75%): $699.99
─────────────────────────
TOTAL: $8,699.94
Payment Terms: Net 30
Thank you for your business!`;
// Helper function to convert object to XML
// Prepare fields for JSON/XML output - remove duplicates and restructure
function prepareFieldsForOutput(fields, format = "json") {
if (!fields || typeof fields !== "object") {
return fields;
}
const output = { ...fields };
// Remove full_text from top-level if pages array exists (to avoid duplication)
if (output.pages && Array.isArray(output.pages) && output.pages.length > 0) {
delete output.full_text;
// Clean up each page: remove full_text from page.fields (it duplicates page.text)
output.pages = output.pages.map(page => {
const cleanedPage = { ...page };
if (cleanedPage.fields && typeof cleanedPage.fields === "object") {
const cleanedFields = { ...cleanedPage.fields };
// Remove full_text from page fields (duplicates page.text)
delete cleanedFields.full_text;
cleanedPage.fields = cleanedFields;
}
return cleanedPage;
});
}
// For JSON and XML: restructure pages into separate top-level fields (page_1, page_2, etc.)
if ((format === "json" || format === "xml") && output.pages && Array.isArray(output.pages)) {
// Get top-level field keys (these are merged from all pages - avoid duplicating in page fields)
const topLevelKeys = new Set(Object.keys(output).filter(k => k !== "pages" && k !== "full_text"));
output.pages.forEach((page, idx) => {
const pageNum = page.page_number || idx + 1;
const pageFields = page.fields || {};
// Remove duplicate fields from page.fields:
// 1. Remove full_text (duplicates page.text)
// 2. Remove fields that match top-level fields (already shown at root)
const cleanedPageFields = {};
for (const [key, value] of Object.entries(pageFields)) {
// Skip full_text and fields that match top-level exactly
if (key !== "full_text" && (!topLevelKeys.has(key) || (value !== output[key]))) {
cleanedPageFields[key] = value;
}
}
const pageObj = {
text: page.text || "",
confidence: page.confidence || 0,
doc_type: page.doc_type || "other"
};
// Only add fields if there are unique page-specific fields
if (Object.keys(cleanedPageFields).length > 0) {
pageObj.fields = cleanedPageFields;
}
output[`page_${pageNum}`] = pageObj;
});
// Remove pages array - we now have page_1, page_2, etc. as separate fields
delete output.pages;
}
return output;
}
function objectToXML(obj, rootName = "extraction") {
// Prepare fields - remove full_text if pages exist
const preparedObj = prepareFieldsForOutput(obj, "xml");
let xml = `\n<${rootName}>\n`;
const convert = (obj, indent = " ") => {
for (const [key, value] of Object.entries(obj)) {
if (value === null || value === undefined) continue;
// Skip full_text if pages exist (already handled in prepareFieldsForOutput)
if (key === "full_text" && obj.pages && Array.isArray(obj.pages) && obj.pages.length > 0) {
continue;
}
if (Array.isArray(value)) {
value.forEach((item) => {
xml += `${indent}<${key}>\n`;
if (typeof item === "object") {
convert(item, indent + " ");
} else {
xml += `${indent} ${escapeXML(String(item))}\n`;
}
xml += `${indent}${key}>\n`;
});
} else if (typeof value === "object") {
xml += `${indent}<${key}>\n`;
convert(value, indent + " ");
xml += `${indent}${key}>\n`;
} else {
xml += `${indent}<${key}>${escapeXML(String(value))}${key}>\n`;
}
}
};
convert(preparedObj);
xml += `${rootName}>`;
return xml;
}
function escapeXML(str) {
return str
.replace(/&/g, "&")
.replace(//g, ">")
.replace(/"/g, """)
.replace(/'/g, "'");
}
// Helper function to format fields as readable text
function fieldsToText(fields) {
if (!fields || typeof fields !== "object") {
return "No data extracted.";
}
// If full_text exists, show it prominently first
if (fields.full_text) {
let text = "=== FULL EXTRACTED TEXT ===\n\n";
text += fields.full_text;
// Don't show pages array separately if full_text already contains page markers
// (full_text from backend already includes "=== PAGE 1 ===" etc.)
const hasPageMarkers = fields.full_text.includes("=== PAGE") || fields.full_text.includes("--- Page");
// Only show pages array if full_text doesn't already have page breakdown
if (!hasPageMarkers && fields.pages && Array.isArray(fields.pages)) {
text += "\n\n=== TEXT BY PAGE ===\n\n";
fields.pages.forEach((page, idx) => {
text += `--- Page ${page.page_number || idx + 1} ---\n`;
text += page.text || "";
text += "\n\n";
});
}
// Then show other structured fields
const otherFields = { ...fields };
delete otherFields.full_text;
delete otherFields.pages;
if (Object.keys(otherFields).length > 0) {
text += "\n\n=== STRUCTURED FIELDS ===\n\n";
const formatValue = (key, value, indent = "") => {
if (Array.isArray(value)) {
text += `${indent}${key}:\n`;
value.forEach((item, idx) => {
if (typeof item === "object") {
text += `${indent} Item ${idx + 1}:\n`;
Object.entries(item).forEach(([k, v]) => formatValue(k, v, indent + " "));
} else {
text += `${indent} - ${item}\n`;
}
});
} else if (typeof value === "object" && value !== null) {
text += `${indent}${key}:\n`;
Object.entries(value).forEach(([k, v]) => formatValue(k, v, indent + " "));
} else {
text += `${indent}${key}: ${value}\n`;
}
};
Object.entries(otherFields).forEach(([key, value]) => {
formatValue(key, value);
text += "\n";
});
}
return text.trim();
}
// Fallback: format all fields normally
let text = "";
const formatValue = (key, value, indent = "") => {
if (Array.isArray(value)) {
text += `${indent}${key}:\n`;
value.forEach((item, idx) => {
if (typeof item === "object") {
text += `${indent} Item ${idx + 1}:\n`;
Object.entries(item).forEach(([k, v]) => formatValue(k, v, indent + " "));
} else {
text += `${indent} - ${item}\n`;
}
});
} else if (typeof value === "object" && value !== null) {
text += `${indent}${key}:\n`;
Object.entries(value).forEach(([k, v]) => formatValue(k, v, indent + " "));
} else {
text += `${indent}${key}: ${value}\n`;
}
};
Object.entries(fields).forEach(([key, value]) => {
formatValue(key, value);
text += "\n";
});
return text.trim() || "No data extracted.";
}
export default function ExtractionOutput({ hasFile, isProcessing, isComplete, extractionResult }) {
const [activeTab, setActiveTab] = useState("json");
const [copied, setCopied] = useState(false);
// Get fields from extraction result, default to empty object
const fields = extractionResult?.fields || {};
const confidence = extractionResult?.confidence || 0;
const fieldsExtracted = extractionResult?.fieldsExtracted || 0;
const totalTime = extractionResult?.totalTime || 0;
// Initialize expanded sections based on available fields
const [expandedSections, setExpandedSections] = useState(() =>
Object.keys(fields).slice(0, 5) // Expand first 5 sections by default
);
const handleCopy = () => {
let content = "";
if (activeTab === "json") {
const preparedFields = prepareFieldsForOutput(fields, "json");
content = JSON.stringify(preparedFields, null, 2);
} else if (activeTab === "xml") {
content = objectToXML(fields);
} else {
content = fieldsToText(fields);
}
navigator.clipboard.writeText(content);
setCopied(true);
setTimeout(() => setCopied(false), 2000);
};
// Get prepared fields for display
const preparedFields = React.useMemo(() => {
return prepareFieldsForOutput(fields, "json");
}, [fields]);
// Update expanded sections when fields change
React.useEffect(() => {
if (extractionResult?.fields) {
setExpandedSections(Object.keys(extractionResult.fields).slice(0, 5));
}
}, [extractionResult]);
const toggleSection = (section) => {
setExpandedSections((prev) =>
prev.includes(section) ? prev.filter((s) => s !== section) : [...prev, section]
);
};
const renderValue = (value) => {
if (typeof value === "number") {
return {value};
}
if (typeof value === "string") {
return "{value}";
}
return String(value);
};
const renderSection = (key, value, level = 0) => {
const isExpanded = expandedSections.includes(key);
const isObject = typeof value === "object" && value !== null;
const isArray = Array.isArray(value);
if (!isObject) {
return (
"{key}"
:
{renderValue(value)}
);
}
return (
{isExpanded && (
{isArray ? (
value.map((item, idx) => (
{Object.entries(item).map(([k, v]) => renderSection(k, v, level + 2))}
{idx < value.length - 1 &&
}
))
) : (
Object.entries(value).map(([k, v]) => renderSection(k, v, level + 1))
)}
{isArray ? "]" : "}"}
)}
);
};
return (
{/* Header */}
Extracted Data
{isComplete
? `${fieldsExtracted} field${fieldsExtracted !== 1 ? 's' : ''} extracted`
: "Waiting for extraction"}
{isComplete && (
Text
JSON
XML
)}
{/* Output Area */}
{!hasFile ? (
Extracted data will appear here
) : isProcessing ? (
Extracting data...
Analyzing document structure
{[0, 1, 2].map((i) => (
))}
) : isComplete && Object.keys(fields).length === 0 ? (
No data extracted
The document may not contain extractable fields
) : (
{activeTab === "text" ? (
{fieldsToText(fields)}
) : activeTab === "json" ? (
{"{"}
{Object.keys(preparedFields).length > 0 ? (
Object.entries(preparedFields).map(([key, value]) =>
renderSection(key, value, 1)
)
) : (
No fields extracted
)}
{"}"}
) : (
{objectToXML(fields).split("\n").map((line, i) => (
{line.includes("<") ? (
<>
{line.split(/(<\/?[\w\s=".-]+>)/g).map((part, j) => {
if (part.startsWith("")) {
return (
{part}
);
}
if (part.startsWith("<")) {
return (
{part}
);
}
return (
{part}
);
})}
>
) : (
line
)}
))}
)}
)}
{/* Confidence Footer */}
{isComplete && extractionResult && (
= 90 ? "bg-emerald-500" : confidence >= 70 ? "bg-amber-500" : "bg-red-500"
)} />
Confidence:
{confidence > 0 ? `${confidence.toFixed(1)}%` : "N/A"}
Fields:
{fieldsExtracted}
Processed in {totalTime >= 1000 ? `${(totalTime / 1000).toFixed(1)}s` : `${totalTime}ms`}
)}
);
}