AIEXTRACT1 / frontend /src /components /ocr /ExtractionOutput.jsx
Seth0330's picture
Update frontend/src/components/ocr/ExtractionOutput.jsx
d0cfc3b verified
import React, { useState, useEffect } from "react";
import { motion, AnimatePresence } from "framer-motion";
import {
Code2,
Copy,
Check,
Braces,
FileCode2,
FileText,
Sparkles,
ChevronDown,
} from "lucide-react";
import { Button } from "@/components/ui/button";
import { Tabs, TabsList, TabsTrigger } from "@/components/ui/tabs";
import { cn } from "@/lib/utils";
// Mock extracted data
const mockData = {
document: {
type: "Invoice",
confidence: 0.98,
},
vendor: {
name: "Acme Corporation",
address: "123 Business Ave, Suite 400",
city: "San Francisco",
state: "CA",
zip: "94102",
phone: "+1 (555) 123-4567",
},
invoice: {
number: "INV-2024-0847",
date: "2024-01-15",
due_date: "2024-02-14",
po_number: "PO-9823",
},
items: [
{ description: "Professional Services", quantity: 40, unit_price: 150.0, total: 6000.0 },
{ description: "Software License", quantity: 5, unit_price: 299.99, total: 1499.95 },
{ description: "Support Package", quantity: 1, unit_price: 500.0, total: 500.0 },
],
totals: {
subtotal: 7999.95,
tax_rate: 0.0875,
tax_amount: 699.99,
total: 8699.94,
},
};
const mockXML = `<?xml version="1.0" encoding="UTF-8"?>
<extraction>
<document type="Invoice" confidence="0.98"/>
<vendor>
<name>Acme Corporation</name>
<address>123 Business Ave, Suite 400</address>
<city>San Francisco</city>
<state>CA</state>
<zip>94102</zip>
</vendor>
<invoice>
<number>INV-2024-0847</number>
<date>2024-01-15</date>
<due_date>2024-02-14</due_date>
</invoice>
<items>
<item>
<description>Professional Services</description>
<quantity>40</quantity>
<total>6000.00</total>
</item>
</items>
<totals>
<subtotal>7999.95</subtotal>
<tax>699.99</tax>
<total>8699.94</total>
</totals>
</extraction>`;
const mockText = `INVOICE
ACME CORPORATION
123 Business Ave, Suite 400
San Francisco, CA 94102
Phone: +1 (555) 123-4567
Invoice Number: INV-2024-0847
Invoice Date: January 15, 2024
Due Date: February 14, 2024
PO Number: PO-9823
BILL TO:
Customer Name
456 Client Street
New York, NY 10001
ITEMS:
─────────────────────────────────────────────────────────
Description Qty Unit Price Total
─────────────────────────────────────────────────────────
Professional Services 40 $150.00 $6,000.00
Software License 5 $299.99 $1,499.95
Support Package 1 $500.00 $500.00
─────────────────────────────────────────────────────────
Subtotal: $7,999.95
Tax (8.75%): $699.99
─────────────────────────
TOTAL: $8,699.94
Payment Terms: Net 30
Thank you for your business!`;
// Helper function to convert object to XML
// Prepare fields for JSON/XML output - remove duplicates and restructure
function prepareFieldsForOutput(fields, format = "json") {
if (!fields || typeof fields !== "object") {
return fields;
}
const output = { ...fields };
// Remove full_text from top-level if pages array exists (to avoid duplication)
if (output.pages && Array.isArray(output.pages) && output.pages.length > 0) {
delete output.full_text;
// Clean up each page: remove full_text from page.fields (it duplicates page.text)
output.pages = output.pages.map(page => {
const cleanedPage = { ...page };
if (cleanedPage.fields && typeof cleanedPage.fields === "object") {
const cleanedFields = { ...cleanedPage.fields };
// Remove full_text from page fields (duplicates page.text)
delete cleanedFields.full_text;
cleanedPage.fields = cleanedFields;
}
return cleanedPage;
});
}
// For JSON and XML: restructure pages into separate top-level fields (page_1, page_2, etc.)
if ((format === "json" || format === "xml") && output.pages && Array.isArray(output.pages)) {
// Get top-level field keys (these are merged from all pages - avoid duplicating in page fields)
const topLevelKeys = new Set(Object.keys(output).filter(k => k !== "pages" && k !== "full_text"));
output.pages.forEach((page, idx) => {
const pageNum = page.page_number || idx + 1;
const pageFields = page.fields || {};
// Remove duplicate fields from page.fields:
// 1. Remove full_text (duplicates page.text)
// 2. Remove fields that match top-level fields (already shown at root)
const cleanedPageFields = {};
for (const [key, value] of Object.entries(pageFields)) {
// Skip full_text and fields that match top-level exactly
if (key !== "full_text" && (!topLevelKeys.has(key) || (value !== output[key]))) {
cleanedPageFields[key] = value;
}
}
const pageObj = {
text: page.text || "",
confidence: page.confidence || 0,
doc_type: page.doc_type || "other"
};
// Only add fields if there are unique page-specific fields
if (Object.keys(cleanedPageFields).length > 0) {
pageObj.fields = cleanedPageFields;
}
output[`page_${pageNum}`] = pageObj;
});
// Remove pages array - we now have page_1, page_2, etc. as separate fields
delete output.pages;
}
return output;
}
function objectToXML(obj, rootName = "extraction") {
// Prepare fields - remove full_text if pages exist
const preparedObj = prepareFieldsForOutput(obj, "xml");
let xml = `<?xml version="1.0" encoding="UTF-8"?>\n<${rootName}>\n`;
const convert = (obj, indent = " ") => {
for (const [key, value] of Object.entries(obj)) {
if (value === null || value === undefined) continue;
// Skip full_text if pages exist (already handled in prepareFieldsForOutput)
if (key === "full_text" && obj.pages && Array.isArray(obj.pages) && obj.pages.length > 0) {
continue;
}
if (Array.isArray(value)) {
value.forEach((item) => {
xml += `${indent}<${key}>\n`;
if (typeof item === "object") {
convert(item, indent + " ");
} else {
xml += `${indent} ${escapeXML(String(item))}\n`;
}
xml += `${indent}</${key}>\n`;
});
} else if (typeof value === "object") {
xml += `${indent}<${key}>\n`;
convert(value, indent + " ");
xml += `${indent}</${key}>\n`;
} else {
xml += `${indent}<${key}>${escapeXML(String(value))}</${key}>\n`;
}
}
};
convert(preparedObj);
xml += `</${rootName}>`;
return xml;
}
function escapeXML(str) {
return str
.replace(/&/g, "&amp;")
.replace(/</g, "&lt;")
.replace(/>/g, "&gt;")
.replace(/"/g, "&quot;")
.replace(/'/g, "&apos;");
}
// Helper function to format fields as readable text
function fieldsToText(fields) {
if (!fields || typeof fields !== "object") {
return "No data extracted.";
}
// If full_text exists, show it prominently first
if (fields.full_text) {
let text = "=== FULL EXTRACTED TEXT ===\n\n";
text += fields.full_text;
// Don't show pages array separately if full_text already contains page markers
// (full_text from backend already includes "=== PAGE 1 ===" etc.)
const hasPageMarkers = fields.full_text.includes("=== PAGE") || fields.full_text.includes("--- Page");
// Only show pages array if full_text doesn't already have page breakdown
if (!hasPageMarkers && fields.pages && Array.isArray(fields.pages)) {
text += "\n\n=== TEXT BY PAGE ===\n\n";
fields.pages.forEach((page, idx) => {
text += `--- Page ${page.page_number || idx + 1} ---\n`;
text += page.text || "";
text += "\n\n";
});
}
// Then show other structured fields
const otherFields = { ...fields };
delete otherFields.full_text;
delete otherFields.pages;
if (Object.keys(otherFields).length > 0) {
text += "\n\n=== STRUCTURED FIELDS ===\n\n";
const formatValue = (key, value, indent = "") => {
if (Array.isArray(value)) {
text += `${indent}${key}:\n`;
value.forEach((item, idx) => {
if (typeof item === "object") {
text += `${indent} Item ${idx + 1}:\n`;
Object.entries(item).forEach(([k, v]) => formatValue(k, v, indent + " "));
} else {
text += `${indent} - ${item}\n`;
}
});
} else if (typeof value === "object" && value !== null) {
text += `${indent}${key}:\n`;
Object.entries(value).forEach(([k, v]) => formatValue(k, v, indent + " "));
} else {
text += `${indent}${key}: ${value}\n`;
}
};
Object.entries(otherFields).forEach(([key, value]) => {
formatValue(key, value);
text += "\n";
});
}
return text.trim();
}
// Fallback: format all fields normally
let text = "";
const formatValue = (key, value, indent = "") => {
if (Array.isArray(value)) {
text += `${indent}${key}:\n`;
value.forEach((item, idx) => {
if (typeof item === "object") {
text += `${indent} Item ${idx + 1}:\n`;
Object.entries(item).forEach(([k, v]) => formatValue(k, v, indent + " "));
} else {
text += `${indent} - ${item}\n`;
}
});
} else if (typeof value === "object" && value !== null) {
text += `${indent}${key}:\n`;
Object.entries(value).forEach(([k, v]) => formatValue(k, v, indent + " "));
} else {
text += `${indent}${key}: ${value}\n`;
}
};
Object.entries(fields).forEach(([key, value]) => {
formatValue(key, value);
text += "\n";
});
return text.trim() || "No data extracted.";
}
export default function ExtractionOutput({ hasFile, isProcessing, isComplete, extractionResult }) {
const [activeTab, setActiveTab] = useState("json");
const [copied, setCopied] = useState(false);
// Get fields from extraction result, default to empty object
const fields = extractionResult?.fields || {};
const confidence = extractionResult?.confidence || 0;
const fieldsExtracted = extractionResult?.fieldsExtracted || 0;
const totalTime = extractionResult?.totalTime || 0;
// Initialize expanded sections based on available fields
const [expandedSections, setExpandedSections] = useState(() =>
Object.keys(fields).slice(0, 5) // Expand first 5 sections by default
);
const handleCopy = () => {
let content = "";
if (activeTab === "json") {
const preparedFields = prepareFieldsForOutput(fields, "json");
content = JSON.stringify(preparedFields, null, 2);
} else if (activeTab === "xml") {
content = objectToXML(fields);
} else {
content = fieldsToText(fields);
}
navigator.clipboard.writeText(content);
setCopied(true);
setTimeout(() => setCopied(false), 2000);
};
// Get prepared fields for display
const preparedFields = React.useMemo(() => {
return prepareFieldsForOutput(fields, "json");
}, [fields]);
// Update expanded sections when fields change
React.useEffect(() => {
if (extractionResult?.fields) {
setExpandedSections(Object.keys(extractionResult.fields).slice(0, 5));
}
}, [extractionResult]);
const toggleSection = (section) => {
setExpandedSections((prev) =>
prev.includes(section) ? prev.filter((s) => s !== section) : [...prev, section]
);
};
const renderValue = (value) => {
if (typeof value === "number") {
return <span className="text-amber-600">{value}</span>;
}
if (typeof value === "string") {
return <span className="text-emerald-600">"{value}"</span>;
}
return String(value);
};
const renderSection = (key, value, level = 0) => {
const isExpanded = expandedSections.includes(key);
const isObject = typeof value === "object" && value !== null;
const isArray = Array.isArray(value);
if (!isObject) {
return (
<div
key={key}
className="flex items-start gap-2 py-1"
style={{ paddingLeft: level * 16 }}
>
<span className="text-violet-500">"{key}"</span>
<span className="text-slate-400">:</span>
{renderValue(value)}
</div>
);
}
return (
<div key={key}>
<button
onClick={() => toggleSection(key)}
className="flex items-center gap-2 py-1 hover:bg-slate-50 w-full text-left rounded"
style={{ paddingLeft: level * 16 }}
>
<ChevronDown
className={cn(
"h-3 w-3 text-slate-400 transition-transform",
!isExpanded && "-rotate-90"
)}
/>
<span className="text-violet-500">"{key}"</span>
<span className="text-slate-400">:</span>
<span className="text-slate-400">{isArray ? "[" : "{"}</span>
{!isExpanded && (
<span className="text-slate-300 text-xs">
{isArray ? `${value.length} items` : `${Object.keys(value).length} fields`}
</span>
)}
</button>
<AnimatePresence>
{isExpanded && (
<motion.div
initial={{ height: 0, opacity: 0 }}
animate={{ height: "auto", opacity: 1 }}
exit={{ height: 0, opacity: 0 }}
transition={{ duration: 0.2 }}
className="overflow-hidden"
>
{isArray ? (
value.map((item, idx) => (
<div key={idx} className="border-l border-slate-100 ml-4">
{Object.entries(item).map(([k, v]) => renderSection(k, v, level + 2))}
{idx < value.length - 1 && <div className="h-2" />}
</div>
))
) : (
Object.entries(value).map(([k, v]) => renderSection(k, v, level + 1))
)}
<div style={{ paddingLeft: level * 16 }} className="text-slate-400">
{isArray ? "]" : "}"}
</div>
</motion.div>
)}
</AnimatePresence>
</div>
);
};
return (
<div className="h-full flex flex-col bg-white rounded-2xl border border-slate-200 overflow-hidden">
{/* Header */}
<div className="flex items-center justify-between px-5 py-4 border-b border-slate-100">
<div className="flex items-center gap-3">
<div className="h-8 w-8 rounded-lg bg-emerald-50 flex items-center justify-center">
<Code2 className="h-4 w-4 text-emerald-600" />
</div>
<div>
<h3 className="font-semibold text-slate-800 text-sm">Extracted Data</h3>
<p className="text-xs text-slate-400">
{isComplete
? `${fieldsExtracted} field${fieldsExtracted !== 1 ? 's' : ''} extracted`
: "Waiting for extraction"}
</p>
</div>
</div>
{isComplete && (
<div className="flex items-center gap-2">
<Tabs value={activeTab} onValueChange={setActiveTab}>
<TabsList className="h-8 bg-slate-100 p-0.5">
<TabsTrigger value="text" className="h-7 text-xs gap-1.5">
<FileText className="h-3 w-3" />
Text
</TabsTrigger>
<TabsTrigger value="json" className="h-7 text-xs gap-1.5">
<Braces className="h-3 w-3" />
JSON
</TabsTrigger>
<TabsTrigger value="xml" className="h-7 text-xs gap-1.5">
<FileCode2 className="h-3 w-3" />
XML
</TabsTrigger>
</TabsList>
</Tabs>
<Button
variant="ghost"
size="sm"
onClick={handleCopy}
className="h-8 text-xs gap-1.5"
>
{copied ? (
<>
<Check className="h-3 w-3 text-emerald-500" />
Copied
</>
) : (
<>
<Copy className="h-3 w-3" />
Copy
</>
)}
</Button>
</div>
)}
</div>
{/* Output Area */}
<div className="flex-1 overflow-auto">
{!hasFile ? (
<div className="h-full flex items-center justify-center p-6">
<div className="text-center">
<div className="h-20 w-20 mx-auto rounded-2xl bg-slate-100 flex items-center justify-center mb-4">
<Code2 className="h-10 w-10 text-slate-300" />
</div>
<p className="text-slate-400 text-sm">Extracted data will appear here</p>
</div>
</div>
) : isProcessing ? (
<div className="h-full flex items-center justify-center p-6">
<div className="text-center">
<motion.div
animate={{ rotate: 360 }}
transition={{ duration: 2, repeat: Infinity, ease: "linear" }}
className="h-16 w-16 mx-auto rounded-2xl bg-gradient-to-br from-indigo-100 to-violet-100 flex items-center justify-center mb-4"
>
<Sparkles className="h-8 w-8 text-indigo-500" />
</motion.div>
<p className="text-slate-700 font-medium mb-1">Extracting data...</p>
<p className="text-slate-400 text-sm">Analyzing document structure</p>
<div className="mt-6 flex items-center justify-center gap-1">
{[0, 1, 2].map((i) => (
<motion.div
key={i}
animate={{ scale: [1, 1.2, 1] }}
transition={{
duration: 0.6,
repeat: Infinity,
delay: i * 0.2,
}}
className="h-2 w-2 rounded-full bg-indigo-400"
/>
))}
</div>
</div>
</div>
) : isComplete && Object.keys(fields).length === 0 ? (
<div className="h-full flex items-center justify-center p-6">
<div className="text-center">
<div className="h-20 w-20 mx-auto rounded-2xl bg-amber-100 flex items-center justify-center mb-4">
<Code2 className="h-10 w-10 text-amber-600" />
</div>
<p className="text-slate-600 font-medium mb-1">No data extracted</p>
<p className="text-slate-400 text-sm">The document may not contain extractable fields</p>
</div>
</div>
) : (
<div className="p-4 font-mono text-sm">
{activeTab === "text" ? (
<pre className="text-sm text-slate-700 whitespace-pre-wrap leading-relaxed">
{fieldsToText(fields)}
</pre>
) : activeTab === "json" ? (
<div className="space-y-1">
<span className="text-slate-400">{"{"}</span>
{Object.keys(preparedFields).length > 0 ? (
Object.entries(preparedFields).map(([key, value]) =>
renderSection(key, value, 1)
)
) : (
<div className="pl-4 text-slate-400 italic">No fields extracted</div>
)}
<span className="text-slate-400">{"}"}</span>
</div>
) : (
<pre className="text-sm text-slate-600 whitespace-pre-wrap">
{objectToXML(fields).split("\n").map((line, i) => (
<div key={i} className="hover:bg-slate-50 px-2 -mx-2 rounded">
{line.includes("<") ? (
<>
{line.split(/(<\/?[\w\s=".-]+>)/g).map((part, j) => {
if (part.startsWith("</")) {
return (
<span key={j} className="text-rose-500">
{part}
</span>
);
}
if (part.startsWith("<")) {
return (
<span key={j} className="text-indigo-500">
{part}
</span>
);
}
return (
<span key={j} className="text-slate-700">
{part}
</span>
);
})}
</>
) : (
line
)}
</div>
))}
</pre>
)}
</div>
)}
</div>
{/* Confidence Footer */}
{isComplete && extractionResult && (
<div className="px-5 py-3 border-t border-slate-100 bg-slate-50/50">
<div className="flex items-center justify-between text-xs">
<div className="flex items-center gap-4">
<div className="flex items-center gap-1.5">
<div className={cn(
"h-2 w-2 rounded-full",
confidence >= 90 ? "bg-emerald-500" : confidence >= 70 ? "bg-amber-500" : "bg-red-500"
)} />
<span className="text-slate-500">Confidence:</span>
<span className="font-semibold text-slate-700">
{confidence > 0 ? `${confidence.toFixed(1)}%` : "N/A"}
</span>
</div>
<div className="flex items-center gap-1.5">
<span className="text-slate-500">Fields:</span>
<span className="font-semibold text-slate-700">{fieldsExtracted}</span>
</div>
</div>
<span className="text-slate-400">
Processed in {totalTime >= 1000 ? `${(totalTime / 1000).toFixed(1)}s` : `${totalTime}ms`}
</span>
</div>
</div>
)}
</div>
);
}