Spaces:
Running
Running
| import React, { useState, useEffect } from "react"; | |
| import { motion, AnimatePresence } from "framer-motion"; | |
| import { | |
| Code2, | |
| Copy, | |
| Check, | |
| Braces, | |
| FileCode2, | |
| FileText, | |
| Sparkles, | |
| ChevronDown, | |
| } from "lucide-react"; | |
| import { Button } from "@/components/ui/button"; | |
| import { Tabs, TabsList, TabsTrigger } from "@/components/ui/tabs"; | |
| import { cn } from "@/lib/utils"; | |
| // Mock extracted data | |
| const mockData = { | |
| document: { | |
| type: "Invoice", | |
| confidence: 0.98, | |
| }, | |
| vendor: { | |
| name: "Acme Corporation", | |
| address: "123 Business Ave, Suite 400", | |
| city: "San Francisco", | |
| state: "CA", | |
| zip: "94102", | |
| phone: "+1 (555) 123-4567", | |
| }, | |
| invoice: { | |
| number: "INV-2024-0847", | |
| date: "2024-01-15", | |
| due_date: "2024-02-14", | |
| po_number: "PO-9823", | |
| }, | |
| items: [ | |
| { description: "Professional Services", quantity: 40, unit_price: 150.0, total: 6000.0 }, | |
| { description: "Software License", quantity: 5, unit_price: 299.99, total: 1499.95 }, | |
| { description: "Support Package", quantity: 1, unit_price: 500.0, total: 500.0 }, | |
| ], | |
| totals: { | |
| subtotal: 7999.95, | |
| tax_rate: 0.0875, | |
| tax_amount: 699.99, | |
| total: 8699.94, | |
| }, | |
| }; | |
| const mockXML = `<?xml version="1.0" encoding="UTF-8"?> | |
| <extraction> | |
| <document type="Invoice" confidence="0.98"/> | |
| <vendor> | |
| <name>Acme Corporation</name> | |
| <address>123 Business Ave, Suite 400</address> | |
| <city>San Francisco</city> | |
| <state>CA</state> | |
| <zip>94102</zip> | |
| </vendor> | |
| <invoice> | |
| <number>INV-2024-0847</number> | |
| <date>2024-01-15</date> | |
| <due_date>2024-02-14</due_date> | |
| </invoice> | |
| <items> | |
| <item> | |
| <description>Professional Services</description> | |
| <quantity>40</quantity> | |
| <total>6000.00</total> | |
| </item> | |
| </items> | |
| <totals> | |
| <subtotal>7999.95</subtotal> | |
| <tax>699.99</tax> | |
| <total>8699.94</total> | |
| </totals> | |
| </extraction>`; | |
| const mockText = `INVOICE | |
| ACME CORPORATION | |
| 123 Business Ave, Suite 400 | |
| San Francisco, CA 94102 | |
| Phone: +1 (555) 123-4567 | |
| Invoice Number: INV-2024-0847 | |
| Invoice Date: January 15, 2024 | |
| Due Date: February 14, 2024 | |
| PO Number: PO-9823 | |
| BILL TO: | |
| Customer Name | |
| 456 Client Street | |
| New York, NY 10001 | |
| ITEMS: | |
| βββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| Description Qty Unit Price Total | |
| βββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| Professional Services 40 $150.00 $6,000.00 | |
| Software License 5 $299.99 $1,499.95 | |
| Support Package 1 $500.00 $500.00 | |
| βββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| Subtotal: $7,999.95 | |
| Tax (8.75%): $699.99 | |
| βββββββββββββββββββββββββ | |
| TOTAL: $8,699.94 | |
| Payment Terms: Net 30 | |
| Thank you for your business!`; | |
| // Helper function to convert object to XML | |
| // Prepare fields for JSON/XML output - remove duplicates and restructure | |
| function prepareFieldsForOutput(fields, format = "json") { | |
| if (!fields || typeof fields !== "object") { | |
| return fields; | |
| } | |
| const output = { ...fields }; | |
| // Remove full_text from top-level if pages array exists (to avoid duplication) | |
| if (output.pages && Array.isArray(output.pages) && output.pages.length > 0) { | |
| delete output.full_text; | |
| // Clean up each page: remove full_text from page.fields (it duplicates page.text) | |
| output.pages = output.pages.map(page => { | |
| const cleanedPage = { ...page }; | |
| if (cleanedPage.fields && typeof cleanedPage.fields === "object") { | |
| const cleanedFields = { ...cleanedPage.fields }; | |
| // Remove full_text from page fields (duplicates page.text) | |
| delete cleanedFields.full_text; | |
| cleanedPage.fields = cleanedFields; | |
| } | |
| return cleanedPage; | |
| }); | |
| } | |
| // For JSON and XML: restructure pages into separate top-level fields (page_1, page_2, etc.) | |
| if ((format === "json" || format === "xml") && output.pages && Array.isArray(output.pages)) { | |
| // Get top-level field keys (these are merged from all pages - avoid duplicating in page fields) | |
| const topLevelKeys = new Set(Object.keys(output).filter(k => k !== "pages" && k !== "full_text")); | |
| output.pages.forEach((page, idx) => { | |
| const pageNum = page.page_number || idx + 1; | |
| const pageFields = page.fields || {}; | |
| // Remove duplicate fields from page.fields: | |
| // 1. Remove full_text (duplicates page.text) | |
| // 2. Remove fields that match top-level fields (already shown at root) | |
| const cleanedPageFields = {}; | |
| for (const [key, value] of Object.entries(pageFields)) { | |
| // Skip full_text and fields that match top-level exactly | |
| if (key !== "full_text" && (!topLevelKeys.has(key) || (value !== output[key]))) { | |
| cleanedPageFields[key] = value; | |
| } | |
| } | |
| const pageObj = { | |
| text: page.text || "", | |
| confidence: page.confidence || 0, | |
| doc_type: page.doc_type || "other" | |
| }; | |
| // Only add fields if there are unique page-specific fields | |
| if (Object.keys(cleanedPageFields).length > 0) { | |
| pageObj.fields = cleanedPageFields; | |
| } | |
| output[`page_${pageNum}`] = pageObj; | |
| }); | |
| // Remove pages array - we now have page_1, page_2, etc. as separate fields | |
| delete output.pages; | |
| } | |
| return output; | |
| } | |
| function objectToXML(obj, rootName = "extraction") { | |
| // Prepare fields - remove full_text if pages exist | |
| const preparedObj = prepareFieldsForOutput(obj, "xml"); | |
| let xml = `<?xml version="1.0" encoding="UTF-8"?>\n<${rootName}>\n`; | |
| const convert = (obj, indent = " ") => { | |
| for (const [key, value] of Object.entries(obj)) { | |
| if (value === null || value === undefined) continue; | |
| // Skip full_text if pages exist (already handled in prepareFieldsForOutput) | |
| if (key === "full_text" && obj.pages && Array.isArray(obj.pages) && obj.pages.length > 0) { | |
| continue; | |
| } | |
| if (Array.isArray(value)) { | |
| value.forEach((item) => { | |
| xml += `${indent}<${key}>\n`; | |
| if (typeof item === "object") { | |
| convert(item, indent + " "); | |
| } else { | |
| xml += `${indent} ${escapeXML(String(item))}\n`; | |
| } | |
| xml += `${indent}</${key}>\n`; | |
| }); | |
| } else if (typeof value === "object") { | |
| xml += `${indent}<${key}>\n`; | |
| convert(value, indent + " "); | |
| xml += `${indent}</${key}>\n`; | |
| } else { | |
| xml += `${indent}<${key}>${escapeXML(String(value))}</${key}>\n`; | |
| } | |
| } | |
| }; | |
| convert(preparedObj); | |
| xml += `</${rootName}>`; | |
| return xml; | |
| } | |
| function escapeXML(str) { | |
| return str | |
| .replace(/&/g, "&") | |
| .replace(/</g, "<") | |
| .replace(/>/g, ">") | |
| .replace(/"/g, """) | |
| .replace(/'/g, "'"); | |
| } | |
| // Helper function to format fields as readable text | |
| function fieldsToText(fields) { | |
| if (!fields || typeof fields !== "object") { | |
| return "No data extracted."; | |
| } | |
| // If full_text exists, show it prominently first | |
| if (fields.full_text) { | |
| let text = "=== FULL EXTRACTED TEXT ===\n\n"; | |
| text += fields.full_text; | |
| // Don't show pages array separately if full_text already contains page markers | |
| // (full_text from backend already includes "=== PAGE 1 ===" etc.) | |
| const hasPageMarkers = fields.full_text.includes("=== PAGE") || fields.full_text.includes("--- Page"); | |
| // Only show pages array if full_text doesn't already have page breakdown | |
| if (!hasPageMarkers && fields.pages && Array.isArray(fields.pages)) { | |
| text += "\n\n=== TEXT BY PAGE ===\n\n"; | |
| fields.pages.forEach((page, idx) => { | |
| text += `--- Page ${page.page_number || idx + 1} ---\n`; | |
| text += page.text || ""; | |
| text += "\n\n"; | |
| }); | |
| } | |
| // Then show other structured fields | |
| const otherFields = { ...fields }; | |
| delete otherFields.full_text; | |
| delete otherFields.pages; | |
| if (Object.keys(otherFields).length > 0) { | |
| text += "\n\n=== STRUCTURED FIELDS ===\n\n"; | |
| const formatValue = (key, value, indent = "") => { | |
| if (Array.isArray(value)) { | |
| text += `${indent}${key}:\n`; | |
| value.forEach((item, idx) => { | |
| if (typeof item === "object") { | |
| text += `${indent} Item ${idx + 1}:\n`; | |
| Object.entries(item).forEach(([k, v]) => formatValue(k, v, indent + " ")); | |
| } else { | |
| text += `${indent} - ${item}\n`; | |
| } | |
| }); | |
| } else if (typeof value === "object" && value !== null) { | |
| text += `${indent}${key}:\n`; | |
| Object.entries(value).forEach(([k, v]) => formatValue(k, v, indent + " ")); | |
| } else { | |
| text += `${indent}${key}: ${value}\n`; | |
| } | |
| }; | |
| Object.entries(otherFields).forEach(([key, value]) => { | |
| formatValue(key, value); | |
| text += "\n"; | |
| }); | |
| } | |
| return text.trim(); | |
| } | |
| // Fallback: format all fields normally | |
| let text = ""; | |
| const formatValue = (key, value, indent = "") => { | |
| if (Array.isArray(value)) { | |
| text += `${indent}${key}:\n`; | |
| value.forEach((item, idx) => { | |
| if (typeof item === "object") { | |
| text += `${indent} Item ${idx + 1}:\n`; | |
| Object.entries(item).forEach(([k, v]) => formatValue(k, v, indent + " ")); | |
| } else { | |
| text += `${indent} - ${item}\n`; | |
| } | |
| }); | |
| } else if (typeof value === "object" && value !== null) { | |
| text += `${indent}${key}:\n`; | |
| Object.entries(value).forEach(([k, v]) => formatValue(k, v, indent + " ")); | |
| } else { | |
| text += `${indent}${key}: ${value}\n`; | |
| } | |
| }; | |
| Object.entries(fields).forEach(([key, value]) => { | |
| formatValue(key, value); | |
| text += "\n"; | |
| }); | |
| return text.trim() || "No data extracted."; | |
| } | |
| export default function ExtractionOutput({ hasFile, isProcessing, isComplete, extractionResult }) { | |
| const [activeTab, setActiveTab] = useState("json"); | |
| const [copied, setCopied] = useState(false); | |
| // Get fields from extraction result, default to empty object | |
| const fields = extractionResult?.fields || {}; | |
| const confidence = extractionResult?.confidence || 0; | |
| const fieldsExtracted = extractionResult?.fieldsExtracted || 0; | |
| const totalTime = extractionResult?.totalTime || 0; | |
| // Initialize expanded sections based on available fields | |
| const [expandedSections, setExpandedSections] = useState(() => | |
| Object.keys(fields).slice(0, 5) // Expand first 5 sections by default | |
| ); | |
| const handleCopy = () => { | |
| let content = ""; | |
| if (activeTab === "json") { | |
| const preparedFields = prepareFieldsForOutput(fields, "json"); | |
| content = JSON.stringify(preparedFields, null, 2); | |
| } else if (activeTab === "xml") { | |
| content = objectToXML(fields); | |
| } else { | |
| content = fieldsToText(fields); | |
| } | |
| navigator.clipboard.writeText(content); | |
| setCopied(true); | |
| setTimeout(() => setCopied(false), 2000); | |
| }; | |
| // Get prepared fields for display | |
| const preparedFields = React.useMemo(() => { | |
| return prepareFieldsForOutput(fields, "json"); | |
| }, [fields]); | |
| // Update expanded sections when fields change | |
| React.useEffect(() => { | |
| if (extractionResult?.fields) { | |
| setExpandedSections(Object.keys(extractionResult.fields).slice(0, 5)); | |
| } | |
| }, [extractionResult]); | |
| const toggleSection = (section) => { | |
| setExpandedSections((prev) => | |
| prev.includes(section) ? prev.filter((s) => s !== section) : [...prev, section] | |
| ); | |
| }; | |
| const renderValue = (value) => { | |
| if (typeof value === "number") { | |
| return <span className="text-amber-600">{value}</span>; | |
| } | |
| if (typeof value === "string") { | |
| return <span className="text-emerald-600">"{value}"</span>; | |
| } | |
| return String(value); | |
| }; | |
| const renderSection = (key, value, level = 0) => { | |
| const isExpanded = expandedSections.includes(key); | |
| const isObject = typeof value === "object" && value !== null; | |
| const isArray = Array.isArray(value); | |
| if (!isObject) { | |
| return ( | |
| <div | |
| key={key} | |
| className="flex items-start gap-2 py-1" | |
| style={{ paddingLeft: level * 16 }} | |
| > | |
| <span className="text-violet-500">"{key}"</span> | |
| <span className="text-slate-400">:</span> | |
| {renderValue(value)} | |
| </div> | |
| ); | |
| } | |
| return ( | |
| <div key={key}> | |
| <button | |
| onClick={() => toggleSection(key)} | |
| className="flex items-center gap-2 py-1 hover:bg-slate-50 w-full text-left rounded" | |
| style={{ paddingLeft: level * 16 }} | |
| > | |
| <ChevronDown | |
| className={cn( | |
| "h-3 w-3 text-slate-400 transition-transform", | |
| !isExpanded && "-rotate-90" | |
| )} | |
| /> | |
| <span className="text-violet-500">"{key}"</span> | |
| <span className="text-slate-400">:</span> | |
| <span className="text-slate-400">{isArray ? "[" : "{"}</span> | |
| {!isExpanded && ( | |
| <span className="text-slate-300 text-xs"> | |
| {isArray ? `${value.length} items` : `${Object.keys(value).length} fields`} | |
| </span> | |
| )} | |
| </button> | |
| <AnimatePresence> | |
| {isExpanded && ( | |
| <motion.div | |
| initial={{ height: 0, opacity: 0 }} | |
| animate={{ height: "auto", opacity: 1 }} | |
| exit={{ height: 0, opacity: 0 }} | |
| transition={{ duration: 0.2 }} | |
| className="overflow-hidden" | |
| > | |
| {isArray ? ( | |
| value.map((item, idx) => ( | |
| <div key={idx} className="border-l border-slate-100 ml-4"> | |
| {Object.entries(item).map(([k, v]) => renderSection(k, v, level + 2))} | |
| {idx < value.length - 1 && <div className="h-2" />} | |
| </div> | |
| )) | |
| ) : ( | |
| Object.entries(value).map(([k, v]) => renderSection(k, v, level + 1)) | |
| )} | |
| <div style={{ paddingLeft: level * 16 }} className="text-slate-400"> | |
| {isArray ? "]" : "}"} | |
| </div> | |
| </motion.div> | |
| )} | |
| </AnimatePresence> | |
| </div> | |
| ); | |
| }; | |
| return ( | |
| <div className="h-full flex flex-col bg-white rounded-2xl border border-slate-200 overflow-hidden"> | |
| {/* Header */} | |
| <div className="flex items-center justify-between px-5 py-4 border-b border-slate-100"> | |
| <div className="flex items-center gap-3"> | |
| <div className="h-8 w-8 rounded-lg bg-emerald-50 flex items-center justify-center"> | |
| <Code2 className="h-4 w-4 text-emerald-600" /> | |
| </div> | |
| <div> | |
| <h3 className="font-semibold text-slate-800 text-sm">Extracted Data</h3> | |
| <p className="text-xs text-slate-400"> | |
| {isComplete | |
| ? `${fieldsExtracted} field${fieldsExtracted !== 1 ? 's' : ''} extracted` | |
| : "Waiting for extraction"} | |
| </p> | |
| </div> | |
| </div> | |
| {isComplete && ( | |
| <div className="flex items-center gap-2"> | |
| <Tabs value={activeTab} onValueChange={setActiveTab}> | |
| <TabsList className="h-8 bg-slate-100 p-0.5"> | |
| <TabsTrigger value="text" className="h-7 text-xs gap-1.5"> | |
| <FileText className="h-3 w-3" /> | |
| Text | |
| </TabsTrigger> | |
| <TabsTrigger value="json" className="h-7 text-xs gap-1.5"> | |
| <Braces className="h-3 w-3" /> | |
| JSON | |
| </TabsTrigger> | |
| <TabsTrigger value="xml" className="h-7 text-xs gap-1.5"> | |
| <FileCode2 className="h-3 w-3" /> | |
| XML | |
| </TabsTrigger> | |
| </TabsList> | |
| </Tabs> | |
| <Button | |
| variant="ghost" | |
| size="sm" | |
| onClick={handleCopy} | |
| className="h-8 text-xs gap-1.5" | |
| > | |
| {copied ? ( | |
| <> | |
| <Check className="h-3 w-3 text-emerald-500" /> | |
| Copied | |
| </> | |
| ) : ( | |
| <> | |
| <Copy className="h-3 w-3" /> | |
| Copy | |
| </> | |
| )} | |
| </Button> | |
| </div> | |
| )} | |
| </div> | |
| {/* Output Area */} | |
| <div className="flex-1 overflow-auto"> | |
| {!hasFile ? ( | |
| <div className="h-full flex items-center justify-center p-6"> | |
| <div className="text-center"> | |
| <div className="h-20 w-20 mx-auto rounded-2xl bg-slate-100 flex items-center justify-center mb-4"> | |
| <Code2 className="h-10 w-10 text-slate-300" /> | |
| </div> | |
| <p className="text-slate-400 text-sm">Extracted data will appear here</p> | |
| </div> | |
| </div> | |
| ) : isProcessing ? ( | |
| <div className="h-full flex items-center justify-center p-6"> | |
| <div className="text-center"> | |
| <motion.div | |
| animate={{ rotate: 360 }} | |
| transition={{ duration: 2, repeat: Infinity, ease: "linear" }} | |
| className="h-16 w-16 mx-auto rounded-2xl bg-gradient-to-br from-indigo-100 to-violet-100 flex items-center justify-center mb-4" | |
| > | |
| <Sparkles className="h-8 w-8 text-indigo-500" /> | |
| </motion.div> | |
| <p className="text-slate-700 font-medium mb-1">Extracting data...</p> | |
| <p className="text-slate-400 text-sm">Analyzing document structure</p> | |
| <div className="mt-6 flex items-center justify-center gap-1"> | |
| {[0, 1, 2].map((i) => ( | |
| <motion.div | |
| key={i} | |
| animate={{ scale: [1, 1.2, 1] }} | |
| transition={{ | |
| duration: 0.6, | |
| repeat: Infinity, | |
| delay: i * 0.2, | |
| }} | |
| className="h-2 w-2 rounded-full bg-indigo-400" | |
| /> | |
| ))} | |
| </div> | |
| </div> | |
| </div> | |
| ) : isComplete && Object.keys(fields).length === 0 ? ( | |
| <div className="h-full flex items-center justify-center p-6"> | |
| <div className="text-center"> | |
| <div className="h-20 w-20 mx-auto rounded-2xl bg-amber-100 flex items-center justify-center mb-4"> | |
| <Code2 className="h-10 w-10 text-amber-600" /> | |
| </div> | |
| <p className="text-slate-600 font-medium mb-1">No data extracted</p> | |
| <p className="text-slate-400 text-sm">The document may not contain extractable fields</p> | |
| </div> | |
| </div> | |
| ) : ( | |
| <div className="p-4 font-mono text-sm"> | |
| {activeTab === "text" ? ( | |
| <pre className="text-sm text-slate-700 whitespace-pre-wrap leading-relaxed"> | |
| {fieldsToText(fields)} | |
| </pre> | |
| ) : activeTab === "json" ? ( | |
| <div className="space-y-1"> | |
| <span className="text-slate-400">{"{"}</span> | |
| {Object.keys(preparedFields).length > 0 ? ( | |
| Object.entries(preparedFields).map(([key, value]) => | |
| renderSection(key, value, 1) | |
| ) | |
| ) : ( | |
| <div className="pl-4 text-slate-400 italic">No fields extracted</div> | |
| )} | |
| <span className="text-slate-400">{"}"}</span> | |
| </div> | |
| ) : ( | |
| <pre className="text-sm text-slate-600 whitespace-pre-wrap"> | |
| {objectToXML(fields).split("\n").map((line, i) => ( | |
| <div key={i} className="hover:bg-slate-50 px-2 -mx-2 rounded"> | |
| {line.includes("<") ? ( | |
| <> | |
| {line.split(/(<\/?[\w\s=".-]+>)/g).map((part, j) => { | |
| if (part.startsWith("</")) { | |
| return ( | |
| <span key={j} className="text-rose-500"> | |
| {part} | |
| </span> | |
| ); | |
| } | |
| if (part.startsWith("<")) { | |
| return ( | |
| <span key={j} className="text-indigo-500"> | |
| {part} | |
| </span> | |
| ); | |
| } | |
| return ( | |
| <span key={j} className="text-slate-700"> | |
| {part} | |
| </span> | |
| ); | |
| })} | |
| </> | |
| ) : ( | |
| line | |
| )} | |
| </div> | |
| ))} | |
| </pre> | |
| )} | |
| </div> | |
| )} | |
| </div> | |
| {/* Confidence Footer */} | |
| {isComplete && extractionResult && ( | |
| <div className="px-5 py-3 border-t border-slate-100 bg-slate-50/50"> | |
| <div className="flex items-center justify-between text-xs"> | |
| <div className="flex items-center gap-4"> | |
| <div className="flex items-center gap-1.5"> | |
| <div className={cn( | |
| "h-2 w-2 rounded-full", | |
| confidence >= 90 ? "bg-emerald-500" : confidence >= 70 ? "bg-amber-500" : "bg-red-500" | |
| )} /> | |
| <span className="text-slate-500">Confidence:</span> | |
| <span className="font-semibold text-slate-700"> | |
| {confidence > 0 ? `${confidence.toFixed(1)}%` : "N/A"} | |
| </span> | |
| </div> | |
| <div className="flex items-center gap-1.5"> | |
| <span className="text-slate-500">Fields:</span> | |
| <span className="font-semibold text-slate-700">{fieldsExtracted}</span> | |
| </div> | |
| </div> | |
| <span className="text-slate-400"> | |
| Processed in {totalTime >= 1000 ? `${(totalTime / 1000).toFixed(1)}s` : `${totalTime}ms`} | |
| </span> | |
| </div> | |
| </div> | |
| )} | |
| </div> | |
| ); | |
| } | |