| | import React, { useState, useEffect, useRef } from "react"; |
| | import { motion, AnimatePresence } from "framer-motion"; |
| | import { |
| | Code2, |
| | Copy, |
| | Check, |
| | Braces, |
| | FileCode2, |
| | FileText, |
| | Sparkles, |
| | ChevronDown, |
| | Upload, |
| | } from "lucide-react"; |
| | import { Button } from "@/components/ui/button"; |
| | import { Tabs, TabsList, TabsTrigger } from "@/components/ui/tabs"; |
| | import { cn } from "@/lib/utils"; |
| |
|
| | |
| | function convertPipeTablesToHTML(text) { |
| | if (!text) return text; |
| | |
| | const lines = text.split('\n'); |
| | const result = []; |
| | let i = 0; |
| | |
| | while (i < lines.length) { |
| | const line = lines[i]; |
| | |
| | |
| | if (line.includes('|') && line.split('|').length >= 3) { |
| | |
| | const isSeparator = /^[\s|\-:]+$/.test(line.trim()); |
| | |
| | if (!isSeparator) { |
| | |
| | const tableRows = []; |
| | let j = i; |
| | |
| | |
| | const headerLine = lines[j]; |
| | const headerCells = headerLine.split('|').map(cell => cell.trim()).filter(cell => cell || cell === ''); |
| | |
| | if (headerCells.length > 0 && !headerCells[0]) headerCells.shift(); |
| | if (headerCells.length > 0 && !headerCells[headerCells.length - 1]) headerCells.pop(); |
| | |
| | if (headerCells.length >= 2) { |
| | tableRows.push(headerCells); |
| | j++; |
| | |
| | |
| | if (j < lines.length && /^[\s|\-:]+$/.test(lines[j].trim())) { |
| | j++; |
| | } |
| | |
| | |
| | while (j < lines.length) { |
| | const rowLine = lines[j]; |
| | if (!rowLine.trim()) break; |
| | |
| | |
| | if (rowLine.includes('|') && rowLine.split('|').length >= 2) { |
| | const isRowSeparator = /^[\s|\-:]+$/.test(rowLine.trim()); |
| | if (!isRowSeparator) { |
| | const rowCells = rowLine.split('|').map(cell => cell.trim()); |
| | |
| | if (rowCells.length > 0 && !rowCells[0]) rowCells.shift(); |
| | if (rowCells.length > 0 && !rowCells[rowCells.length - 1]) rowCells.pop(); |
| | tableRows.push(rowCells); |
| | j++; |
| | } else { |
| | j++; |
| | } |
| | } else { |
| | break; |
| | } |
| | } |
| | |
| | |
| | if (tableRows.length > 0) { |
| | let htmlTable = '<table class="border-collapse border border-gray-300 w-full my-4">\n<thead>\n<tr>'; |
| | |
| | |
| | tableRows[0].forEach(cell => { |
| | htmlTable += `<th class="border border-gray-300 px-4 py-2 bg-gray-100 font-semibold text-left">${escapeHtml(cell)}</th>`; |
| | }); |
| | htmlTable += '</tr>\n</thead>\n<tbody>\n'; |
| | |
| | |
| | for (let rowIdx = 1; rowIdx < tableRows.length; rowIdx++) { |
| | htmlTable += '<tr>'; |
| | tableRows[rowIdx].forEach((cell, colIdx) => { |
| | |
| | const cellContent = cell || ''; |
| | htmlTable += `<td class="border border-gray-300 px-4 py-2">${escapeHtml(cellContent)}</td>`; |
| | }); |
| | htmlTable += '</tr>\n'; |
| | } |
| | |
| | htmlTable += '</tbody>\n</table>'; |
| | result.push(htmlTable); |
| | i = j; |
| | continue; |
| | } |
| | } |
| | } |
| | } |
| | |
| | |
| | result.push(line); |
| | i++; |
| | } |
| | |
| | return result.join('\n'); |
| | } |
| |
|
| | |
| | function escapeHtml(text) { |
| | if (!text) return ''; |
| | const div = document.createElement('div'); |
| | div.textContent = text; |
| | return div.innerHTML; |
| | } |
| |
|
| | |
| | function renderMarkdownToHTML(text) { |
| | if (!text) return ""; |
| | |
| | let html = text; |
| | |
| | |
| | html = convertPipeTablesToHTML(html); |
| | |
| | |
| | |
| | |
| | |
| | html = html.replace(/\$\s*\^\s*\{([^}]+)\}\s*\$/g, '<sup>$1</sup>'); |
| | html = html.replace(/\$\s*\^\s*([^\s$<>]+)\s*\$/g, '<sup>$1</sup>'); |
| | |
| | |
| | html = html.replace(/\$\s*_\s*\{([^}]+)\}\s*\$/g, '<sub>$1</sub>'); |
| | html = html.replace(/\$\s*_\s*([^\s$<>]+)\s*\$/g, '<sub>$1</sub>'); |
| | |
| | |
| | |
| | |
| | |
| | const htmlBlocks = []; |
| | let htmlBlockIndex = 0; |
| | |
| | |
| | html = html.replace(/<table[\s\S]*?<\/table>/gi, (match) => { |
| | const placeholder = `__HTML_BLOCK_${htmlBlockIndex}__`; |
| | htmlBlocks[htmlBlockIndex] = match; |
| | htmlBlockIndex++; |
| | return placeholder; |
| | }); |
| | |
| | |
| | html = html.replace(/^### (.*$)/gim, '<h3>$1</h3>'); |
| | html = html.replace(/^## (.*$)/gim, '<h2>$1</h2>'); |
| | html = html.replace(/^# (.*$)/gim, '<h1>$1</h1>'); |
| | |
| | |
| | html = html.replace(/\*\*(.*?)\*\*/g, '<strong>$1</strong>'); |
| | html = html.replace(/\*(.*?)\*/g, '<em>$1</em>'); |
| | |
| | |
| | html = html.replace(/\[([^\]]+)\]\(([^)]+)\)/g, '<a href="$2" target="_blank" rel="noopener noreferrer">$1</a>'); |
| | |
| | |
| | const parts = html.split(/(__HTML_BLOCK_\d+__)/); |
| | const processedParts = parts.map((part, index) => { |
| | if (part.match(/^__HTML_BLOCK_\d+__$/)) { |
| | |
| | const blockIndex = parseInt(part.match(/\d+/)[0]); |
| | return htmlBlocks[blockIndex]; |
| | } else { |
| | |
| | let processed = part; |
| | |
| | |
| | processed = processed.replace(/\n\n+/g, '</p><p>'); |
| | |
| | processed = processed.replace(/([^\n>])\n([^\n<])/g, '$1<br>$2'); |
| | |
| | |
| | if (processed.trim() && !processed.trim().startsWith('<')) { |
| | processed = '<p>' + processed + '</p>'; |
| | } |
| | |
| | return processed; |
| | } |
| | }); |
| | |
| | html = processedParts.join(''); |
| | |
| | |
| | |
| | html = html.replace(/(<td[^>]*>|<th[^>]*>)([^<]*)\$\s*\^\s*\{([^}]+)\}\s*\$([^<]*)(<\/td>|<\/th>)/gi, |
| | (match, openTag, before, supText, after, closeTag) => { |
| | return openTag + before + '<sup>' + supText + '</sup>' + after + closeTag; |
| | }); |
| | html = html.replace(/(<td[^>]*>|<th[^>]*>)([^<]*)\$\s*\^\s*([^\s$<>]+)\s*\$([^<]*)(<\/td>|<\/th>)/gi, |
| | (match, openTag, before, supText, after, closeTag) => { |
| | return openTag + before + '<sup>' + supText + '</sup>' + after + closeTag; |
| | }); |
| | html = html.replace(/(<td[^>]*>|<th[^>]*>)([^<]*)\$\s*_\s*\{([^}]+)\}\s*\$([^<]*)(<\/td>|<\/th>)/gi, |
| | (match, openTag, before, subText, after, closeTag) => { |
| | return openTag + before + '<sub>' + subText + '</sub>' + after + closeTag; |
| | }); |
| | html = html.replace(/(<td[^>]*>|<th[^>]*>)([^<]*)\$\s*_\s*([^\s$<>]+)\s*\$([^<]*)(<\/td>|<\/th>)/gi, |
| | (match, openTag, before, subText, after, closeTag) => { |
| | return openTag + before + '<sub>' + subText + '</sub>' + after + closeTag; |
| | }); |
| | |
| | |
| | html = html.replace(/<p><\/p>/g, ''); |
| | html = html.replace(/<p>\s*<br>\s*<\/p>/g, ''); |
| | html = html.replace(/<p>\s*<\/p>/g, ''); |
| | |
| | |
| | html = html.replace(/(<\/table>)\s*(<h[1-3])/g, '$1</p><p>$2'); |
| | html = html.replace(/(<\/h[1-3]>)\s*(<table)/g, '$1<p>$2'); |
| | html = html.replace(/(<\/table>)\s*(<p>)/g, '$1$2'); |
| | |
| | return html; |
| | } |
| |
|
| | |
| | const mockData = { |
| | document: { |
| | type: "Invoice", |
| | confidence: 0.98, |
| | }, |
| | vendor: { |
| | name: "Acme Corporation", |
| | address: "123 Business Ave, Suite 400", |
| | city: "San Francisco", |
| | state: "CA", |
| | zip: "94102", |
| | phone: "+1 (555) 123-4567", |
| | }, |
| | invoice: { |
| | number: "INV-2024-0847", |
| | date: "2024-01-15", |
| | due_date: "2024-02-14", |
| | po_number: "PO-9823", |
| | }, |
| | items: [ |
| | { description: "Professional Services", quantity: 40, unit_price: 150.0, total: 6000.0 }, |
| | { description: "Software License", quantity: 5, unit_price: 299.99, total: 1499.95 }, |
| | { description: "Support Package", quantity: 1, unit_price: 500.0, total: 500.0 }, |
| | ], |
| | totals: { |
| | subtotal: 7999.95, |
| | tax_rate: 0.0875, |
| | tax_amount: 699.99, |
| | total: 8699.94, |
| | }, |
| | }; |
| |
|
| | const mockXML = `<?xml version="1.0" encoding="UTF-8"?> |
| | <extraction> |
| | <document type="Invoice" confidence="0.98"/> |
| | <vendor> |
| | <name>Acme Corporation</name> |
| | <address>123 Business Ave, Suite 400</address> |
| | <city>San Francisco</city> |
| | <state>CA</state> |
| | <zip>94102</zip> |
| | </vendor> |
| | <invoice> |
| | <number>INV-2024-0847</number> |
| | <date>2024-01-15</date> |
| | <due_date>2024-02-14</due_date> |
| | </invoice> |
| | <items> |
| | <item> |
| | <description>Professional Services</description> |
| | <quantity>40</quantity> |
| | <total>6000.00</total> |
| | </item> |
| | </items> |
| | <totals> |
| | <subtotal>7999.95</subtotal> |
| | <tax>699.99</tax> |
| | <total>8699.94</total> |
| | </totals> |
| | </extraction>`; |
| |
|
| | const mockText = `INVOICE |
| | |
| | ACME CORPORATION |
| | 123 Business Ave, Suite 400 |
| | San Francisco, CA 94102 |
| | Phone: +1 (555) 123-4567 |
| | |
| | Invoice Number: INV-2024-0847 |
| | Invoice Date: January 15, 2024 |
| | Due Date: February 14, 2024 |
| | PO Number: PO-9823 |
| | |
| | BILL TO: |
| | Customer Name |
| | 456 Client Street |
| | New York, NY 10001 |
| | |
| | ITEMS: |
| | ───────────────────────────────────────────────────────── |
| | Description Qty Unit Price Total |
| | ───────────────────────────────────────────────────────── |
| | Professional Services 40 $150.00 $6,000.00 |
| | Software License 5 $299.99 $1,499.95 |
| | Support Package 1 $500.00 $500.00 |
| | ───────────────────────────────────────────────────────── |
| | |
| | Subtotal: $7,999.95 |
| | Tax (8.75%): $699.99 |
| | ───────────────────────── |
| | TOTAL: $8,699.94 |
| | |
| | Payment Terms: Net 30 |
| | Thank you for your business!`; |
| |
|
| | |
| | |
| | function prepareFieldsForOutput(fields, format = "json") { |
| | if (!fields || typeof fields !== "object") { |
| | return fields; |
| | } |
| | |
| | const output = { ...fields }; |
| | |
| | |
| | const rootFields = output.Fields; |
| | |
| | delete output.Fields; |
| | |
| | |
| | if (output.pages && Array.isArray(output.pages) && output.pages.length > 0) { |
| | delete output.full_text; |
| | |
| | |
| | output.pages = output.pages.map(page => { |
| | const cleanedPage = { ...page }; |
| | if (cleanedPage.fields && typeof cleanedPage.fields === "object") { |
| | const cleanedFields = { ...cleanedPage.fields }; |
| | |
| | delete cleanedFields.full_text; |
| | cleanedPage.fields = cleanedFields; |
| | } |
| | return cleanedPage; |
| | }); |
| | } |
| | |
| | |
| | if ((format === "json" || format === "xml") && output.pages && Array.isArray(output.pages)) { |
| | |
| | const topLevelKeys = new Set(Object.keys(output).filter(k => k !== "pages" && k !== "full_text" && k !== "Fields")); |
| | |
| | output.pages.forEach((page, idx) => { |
| | const pageNum = page.page_number || idx + 1; |
| | const pageFields = page.fields || {}; |
| | |
| | |
| | |
| | |
| | const cleanedPageFields = {}; |
| | for (const [key, value] of Object.entries(pageFields)) { |
| | |
| | if (key !== "full_text" && (!topLevelKeys.has(key) || (value !== output[key]))) { |
| | cleanedPageFields[key] = value; |
| | } |
| | } |
| | |
| | const pageObj = { |
| | text: page.text || "", |
| | confidence: page.confidence || 0, |
| | doc_type: page.doc_type || "other" |
| | }; |
| | |
| | |
| | if (page.table && Array.isArray(page.table) && page.table.length > 0) { |
| | pageObj.table = page.table; |
| | } |
| | if (page.footer_notes && Array.isArray(page.footer_notes) && page.footer_notes.length > 0) { |
| | pageObj.footer_notes = page.footer_notes; |
| | } |
| | |
| | |
| | if (Object.keys(cleanedPageFields).length > 0) { |
| | pageObj.fields = cleanedPageFields; |
| | } |
| | |
| | output[`page_${pageNum}`] = pageObj; |
| | }); |
| | |
| | delete output.pages; |
| | } |
| | |
| | |
| | if (output && typeof output === "object") { |
| | const pageKeys = Object.keys(output).filter(k => k.startsWith("page_")); |
| | for (const pageKey of pageKeys) { |
| | const pageData = output[pageKey]; |
| | if (pageData && typeof pageData === "object") { |
| | |
| | delete pageData.Fields; |
| | delete pageData.metadata; |
| | } |
| | } |
| | } |
| | |
| | |
| | const finalOutput = {}; |
| | if (rootFields && typeof rootFields === "object" && Object.keys(rootFields).length > 0) { |
| | finalOutput.Fields = rootFields; |
| | } |
| | |
| | |
| | Object.keys(output).forEach(key => { |
| | finalOutput[key] = output[key]; |
| | }); |
| | |
| | return finalOutput; |
| | } |
| |
|
| | function objectToXML(obj, rootName = "extraction") { |
| | |
| | const preparedObj = prepareFieldsForOutput(obj, "xml"); |
| | |
| | let xml = `<?xml version="1.0" encoding="UTF-8"?>\n<${rootName}>\n`; |
| | |
| | const convert = (obj, indent = " ") => { |
| | for (const [key, value] of Object.entries(obj)) { |
| | if (value === null || value === undefined) continue; |
| | |
| | |
| | if (key === "full_text" && obj.pages && Array.isArray(obj.pages) && obj.pages.length > 0) { |
| | continue; |
| | } |
| | |
| | if (Array.isArray(value)) { |
| | value.forEach((item) => { |
| | xml += `${indent}<${key}>\n`; |
| | if (typeof item === "object") { |
| | convert(item, indent + " "); |
| | } else { |
| | xml += `${indent} ${escapeXML(String(item))}\n`; |
| | } |
| | xml += `${indent}</${key}>\n`; |
| | }); |
| | } else if (typeof value === "object") { |
| | xml += `${indent}<${key}>\n`; |
| | convert(value, indent + " "); |
| | xml += `${indent}</${key}>\n`; |
| | } else { |
| | xml += `${indent}<${key}>${escapeXML(String(value))}</${key}>\n`; |
| | } |
| | } |
| | }; |
| | |
| | convert(preparedObj); |
| | xml += `</${rootName}>`; |
| | return xml; |
| | } |
| |
|
| | function escapeXML(str) { |
| | return str |
| | .replace(/&/g, "&") |
| | .replace(/</g, "<") |
| | .replace(/>/g, ">") |
| | .replace(/"/g, """) |
| | .replace(/'/g, "'"); |
| | } |
| |
|
| | |
| | function extractTextFromFields(fields) { |
| | if (!fields || typeof fields !== "object") { |
| | return ""; |
| | } |
| | |
| | |
| | const pageKeys = Object.keys(fields).filter(key => key.startsWith("page_")); |
| | if (pageKeys.length > 0) { |
| | |
| | const pageTexts = pageKeys.map(key => { |
| | const page = fields[key]; |
| | if (page && page.text) { |
| | return page.text; |
| | } |
| | return ""; |
| | }).filter(text => text); |
| | |
| | if (pageTexts.length > 0) { |
| | return pageTexts.join("\n\n"); |
| | } |
| | } |
| | |
| | |
| | if (fields.full_text) { |
| | return fields.full_text; |
| | } |
| | |
| | return ""; |
| | } |
| |
|
| | |
| | function fieldsToText(fields) { |
| | if (!fields || typeof fields !== "object") { |
| | return "No data extracted."; |
| | } |
| | |
| | |
| | const extractedText = extractTextFromFields(fields); |
| | |
| | if (extractedText) { |
| | return extractedText; |
| | |
| | |
| | |
| | const hasPageMarkers = fields.full_text.includes("=== PAGE") || fields.full_text.includes("--- Page"); |
| | |
| | |
| | if (!hasPageMarkers && fields.pages && Array.isArray(fields.pages)) { |
| | text += "\n\n=== TEXT BY PAGE ===\n\n"; |
| | fields.pages.forEach((page, idx) => { |
| | text += `--- Page ${page.page_number || idx + 1} ---\n`; |
| | text += page.text || ""; |
| | text += "\n\n"; |
| | }); |
| | } |
| | |
| | |
| | const otherFields = { ...fields }; |
| | delete otherFields.full_text; |
| | delete otherFields.pages; |
| | |
| | if (Object.keys(otherFields).length > 0) { |
| | text += "\n\n=== STRUCTURED FIELDS ===\n\n"; |
| | const formatValue = (key, value, indent = "") => { |
| | if (Array.isArray(value)) { |
| | text += `${indent}${key}:\n`; |
| | value.forEach((item, idx) => { |
| | if (typeof item === "object") { |
| | text += `${indent} Item ${idx + 1}:\n`; |
| | Object.entries(item).forEach(([k, v]) => formatValue(k, v, indent + " ")); |
| | } else { |
| | text += `${indent} - ${item}\n`; |
| | } |
| | }); |
| | } else if (typeof value === "object" && value !== null) { |
| | text += `${indent}${key}:\n`; |
| | Object.entries(value).forEach(([k, v]) => formatValue(k, v, indent + " ")); |
| | } else { |
| | text += `${indent}${key}: ${value}\n`; |
| | } |
| | }; |
| | |
| | Object.entries(otherFields).forEach(([key, value]) => { |
| | formatValue(key, value); |
| | text += "\n"; |
| | }); |
| | } |
| | |
| | return text.trim(); |
| | } |
| | |
| | |
| | let text = ""; |
| | const formatValue = (key, value, indent = "") => { |
| | if (Array.isArray(value)) { |
| | text += `${indent}${key}:\n`; |
| | value.forEach((item, idx) => { |
| | if (typeof item === "object") { |
| | text += `${indent} Item ${idx + 1}:\n`; |
| | Object.entries(item).forEach(([k, v]) => formatValue(k, v, indent + " ")); |
| | } else { |
| | text += `${indent} - ${item}\n`; |
| | } |
| | }); |
| | } else if (typeof value === "object" && value !== null) { |
| | text += `${indent}${key}:\n`; |
| | Object.entries(value).forEach(([k, v]) => formatValue(k, v, indent + " ")); |
| | } else { |
| | text += `${indent}${key}: ${value}\n`; |
| | } |
| | }; |
| | |
| | Object.entries(fields).forEach(([key, value]) => { |
| | formatValue(key, value); |
| | text += "\n"; |
| | }); |
| | |
| | return text.trim() || "No data extracted."; |
| | } |
| |
|
| | export default function ExtractionOutput({ hasFile, isProcessing, isComplete, extractionResult, onNewUpload }) { |
| | const [activeTab, setActiveTab] = useState("json"); |
| | const [copied, setCopied] = useState(false); |
| | const [statusMessage, setStatusMessage] = useState("Preparing document..."); |
| | |
| | |
| | const fields = extractionResult?.fields || {}; |
| | const confidence = extractionResult?.confidence || 0; |
| | const fieldsExtracted = extractionResult?.fieldsExtracted || 0; |
| | const totalTime = extractionResult?.totalTime || 0; |
| |
|
| | |
| | const statusMessages = [ |
| | "Preparing document...", |
| | "Converting pages to images...", |
| | "Visual Reasoning...", |
| | "Reading text from document...", |
| | "Identifying document structure...", |
| | "Extracting tables and data...", |
| | "Analyzing content...", |
| | "Processing pages...", |
| | "Organizing extracted information...", |
| | "Finalizing results...", |
| | ]; |
| |
|
| | |
| | const messageIndexRef = useRef(0); |
| | |
| | useEffect(() => { |
| | if (!isProcessing) { |
| | setStatusMessage("Analyzing document structure"); |
| | messageIndexRef.current = 0; |
| | return; |
| | } |
| |
|
| | setStatusMessage(statusMessages[0]); |
| | messageIndexRef.current = 0; |
| |
|
| | const interval = setInterval(() => { |
| | messageIndexRef.current = (messageIndexRef.current + 1) % statusMessages.length; |
| | setStatusMessage(statusMessages[messageIndexRef.current]); |
| | }, 2500); |
| |
|
| | return () => clearInterval(interval); |
| | }, [isProcessing]); |
| | |
| | |
| | const [expandedSections, setExpandedSections] = useState(() => |
| | Object.keys(fields).slice(0, 5) |
| | ); |
| |
|
| | |
| | const htmlToFormattedText = (html) => { |
| | if (!html) return ""; |
| | |
| | |
| | const tempDiv = document.createElement("div"); |
| | tempDiv.innerHTML = html; |
| | |
| | let text = ""; |
| | |
| | |
| | const processNode = (node) => { |
| | if (node.nodeType === Node.TEXT_NODE) { |
| | return node.textContent; |
| | } |
| | |
| | if (node.nodeType !== Node.ELEMENT_NODE) { |
| | return ""; |
| | } |
| | |
| | const tagName = node.tagName?.toLowerCase(); |
| | const children = Array.from(node.childNodes); |
| | |
| | switch (tagName) { |
| | case "h1": |
| | return "\n\n" + processChildren(children).trim() + "\n\n"; |
| | case "h2": |
| | return "\n\n" + processChildren(children).trim() + "\n\n"; |
| | case "h3": |
| | return "\n" + processChildren(children).trim() + "\n"; |
| | case "p": |
| | return processChildren(children) + "\n\n"; |
| | case "br": |
| | return "\n"; |
| | case "strong": |
| | case "b": |
| | return processChildren(children); |
| | case "em": |
| | case "i": |
| | return processChildren(children); |
| | case "sup": |
| | return processChildren(children); |
| | case "sub": |
| | return processChildren(children); |
| | case "table": |
| | return "\n" + processTable(node) + "\n\n"; |
| | case "ul": |
| | case "ol": |
| | return "\n" + processList(node) + "\n\n"; |
| | case "li": |
| | return " • " + processChildren(children).trim() + "\n"; |
| | default: |
| | return processChildren(children); |
| | } |
| | }; |
| | |
| | const processChildren = (children) => { |
| | return children.map(processNode).join(""); |
| | }; |
| | |
| | const processTable = (table) => { |
| | let tableText = ""; |
| | const rows = table.querySelectorAll("tr"); |
| | |
| | if (rows.length === 0) return ""; |
| | |
| | |
| | const allRows = Array.from(rows); |
| | const columnCount = Math.max(...allRows.map(row => row.querySelectorAll("td, th").length)); |
| | const columnWidths = new Array(columnCount).fill(0); |
| | |
| | allRows.forEach(row => { |
| | const cells = row.querySelectorAll("td, th"); |
| | cells.forEach((cell, colIndex) => { |
| | const cellText = processChildren(Array.from(cell.childNodes)).trim().replace(/\s+/g, " "); |
| | columnWidths[colIndex] = Math.max(columnWidths[colIndex] || 0, cellText.length, 10); |
| | }); |
| | }); |
| | |
| | |
| | allRows.forEach((row, rowIndex) => { |
| | const cells = row.querySelectorAll("td, th"); |
| | const cellTexts = Array.from(cells).map(cell => { |
| | let cellContent = processChildren(Array.from(cell.childNodes)).trim(); |
| | cellContent = cellContent.replace(/\s+/g, " "); |
| | return cellContent; |
| | }); |
| | |
| | |
| | const paddedCells = cellTexts.map((text, i) => { |
| | const width = columnWidths[i] || 10; |
| | return text.padEnd(width); |
| | }); |
| | |
| | tableText += paddedCells.join(" | ") + "\n"; |
| | |
| | |
| | if (rowIndex === 0 && row.querySelector("th")) { |
| | tableText += columnWidths.map(w => "-".repeat(w)).join("-|-") + "\n"; |
| | } |
| | }); |
| | |
| | return tableText; |
| | }; |
| | |
| | const processList = (list) => { |
| | const items = list.querySelectorAll("li"); |
| | return Array.from(items).map(item => { |
| | return " • " + processChildren(Array.from(item.childNodes)).trim(); |
| | }).join("\n"); |
| | }; |
| | |
| | text = processChildren(Array.from(tempDiv.childNodes)); |
| | |
| | |
| | text = text.replace(/\n{3,}/g, "\n\n"); |
| | text = text.trim(); |
| | |
| | return text; |
| | }; |
| |
|
| | const handleCopy = () => { |
| | let content = ""; |
| | if (activeTab === "json") { |
| | const preparedFields = prepareFieldsForOutput(fields, "json"); |
| | content = JSON.stringify(preparedFields, null, 2); |
| | } else if (activeTab === "xml") { |
| | content = objectToXML(fields); |
| | } else { |
| | |
| | const textContent = extractTextFromFields(fields); |
| | const htmlContent = renderMarkdownToHTML(textContent); |
| | content = htmlToFormattedText(htmlContent); |
| | } |
| | |
| | navigator.clipboard.writeText(content); |
| | setCopied(true); |
| | setTimeout(() => setCopied(false), 2000); |
| | }; |
| | |
| | |
| | const preparedFields = React.useMemo(() => { |
| | return prepareFieldsForOutput(fields, "json"); |
| | }, [fields]); |
| | |
| | |
| | React.useEffect(() => { |
| | if (extractionResult?.fields) { |
| | setExpandedSections(Object.keys(extractionResult.fields).slice(0, 5)); |
| | } |
| | }, [extractionResult]); |
| |
|
| | const toggleSection = (section) => { |
| | setExpandedSections((prev) => |
| | prev.includes(section) ? prev.filter((s) => s !== section) : [...prev, section] |
| | ); |
| | }; |
| |
|
| | const renderValue = (value) => { |
| | if (typeof value === "number") { |
| | return <span className="text-amber-600">{value}</span>; |
| | } |
| | if (typeof value === "string") { |
| | return <span className="text-emerald-600">"{value}"</span>; |
| | } |
| | return String(value); |
| | }; |
| |
|
| | const renderSection = (key, value, level = 0) => { |
| | const isExpanded = expandedSections.includes(key); |
| | const isObject = typeof value === "object" && value !== null; |
| | const isArray = Array.isArray(value); |
| |
|
| | if (!isObject) { |
| | return ( |
| | <div |
| | key={key} |
| | className="flex items-start gap-2 py-1" |
| | style={{ paddingLeft: level * 16 }} |
| | > |
| | <span className="text-violet-500">"{key}"</span> |
| | <span className="text-slate-400">:</span> |
| | {renderValue(value)} |
| | </div> |
| | ); |
| | } |
| |
|
| | return ( |
| | <div key={key}> |
| | <button |
| | onClick={() => toggleSection(key)} |
| | className="flex items-center gap-2 py-1 hover:bg-slate-50 w-full text-left rounded" |
| | style={{ paddingLeft: level * 16 }} |
| | > |
| | <ChevronDown |
| | className={cn( |
| | "h-3 w-3 text-slate-400 transition-transform", |
| | !isExpanded && "-rotate-90" |
| | )} |
| | /> |
| | <span className="text-violet-500">"{key}"</span> |
| | <span className="text-slate-400">:</span> |
| | <span className="text-slate-400">{isArray ? "[" : "{"}</span> |
| | {!isExpanded && ( |
| | <span className="text-slate-300 text-xs"> |
| | {isArray ? `${value.length} items` : `${Object.keys(value).length} fields`} |
| | </span> |
| | )} |
| | </button> |
| | <AnimatePresence> |
| | {isExpanded && ( |
| | <motion.div |
| | initial={{ height: 0, opacity: 0 }} |
| | animate={{ height: "auto", opacity: 1 }} |
| | exit={{ height: 0, opacity: 0 }} |
| | transition={{ duration: 0.2 }} |
| | className="overflow-hidden" |
| | > |
| | {isArray ? ( |
| | value.map((item, idx) => ( |
| | <div key={idx} className="border-l border-slate-100 ml-4"> |
| | {Object.entries(item).map(([k, v]) => renderSection(k, v, level + 2))} |
| | {idx < value.length - 1 && <div className="h-2" />} |
| | </div> |
| | )) |
| | ) : ( |
| | Object.entries(value).map(([k, v]) => renderSection(k, v, level + 1)) |
| | )} |
| | <div style={{ paddingLeft: level * 16 }} className="text-slate-400"> |
| | {isArray ? "]" : "}"} |
| | </div> |
| | </motion.div> |
| | )} |
| | </AnimatePresence> |
| | </div> |
| | ); |
| | }; |
| |
|
| | return ( |
| | <div className="h-full flex flex-col bg-white rounded-2xl border border-slate-200 overflow-hidden"> |
| | {/* Header */} |
| | <div className="flex items-center justify-between px-5 py-4 border-b border-slate-100"> |
| | <div className="flex items-center gap-3"> |
| | <div className="h-8 w-8 rounded-lg bg-emerald-50 flex items-center justify-center"> |
| | <Code2 className="h-4 w-4 text-emerald-600" /> |
| | </div> |
| | <div> |
| | <h3 className="font-semibold text-slate-800 text-sm">Extracted Data</h3> |
| | <p className="text-xs text-slate-400"> |
| | {isComplete |
| | ? `${fieldsExtracted} field${fieldsExtracted !== 1 ? 's' : ''} extracted` |
| | : "Waiting for extraction"} |
| | </p> |
| | </div> |
| | {isComplete && onNewUpload && ( |
| | <Button |
| | variant="ghost" |
| | size="sm" |
| | onClick={onNewUpload} |
| | className="h-8 ml-auto text-xs gap-1.5 text-indigo-600 hover:text-indigo-700 hover:bg-indigo-50" |
| | title="Upload new document" |
| | > |
| | <Upload className="h-3.5 w-3.5" /> |
| | New |
| | </Button> |
| | )} |
| | </div> |
| | |
| | {isComplete && ( |
| | <div className="flex items-center gap-2"> |
| | <Tabs value={activeTab} onValueChange={setActiveTab}> |
| | <TabsList className="h-8 bg-slate-100 p-0.5"> |
| | <TabsTrigger value="text" className="h-7 text-xs gap-1.5"> |
| | <FileText className="h-3 w-3" /> |
| | Text |
| | </TabsTrigger> |
| | <TabsTrigger value="json" className="h-7 text-xs gap-1.5"> |
| | <Braces className="h-3 w-3" /> |
| | JSON |
| | </TabsTrigger> |
| | <TabsTrigger value="xml" className="h-7 text-xs gap-1.5"> |
| | <FileCode2 className="h-3 w-3" /> |
| | XML |
| | </TabsTrigger> |
| | </TabsList> |
| | </Tabs> |
| | <Button |
| | variant="ghost" |
| | size="sm" |
| | onClick={handleCopy} |
| | className="h-8 text-xs gap-1.5" |
| | > |
| | {copied ? ( |
| | <> |
| | <Check className="h-3 w-3 text-emerald-500" /> |
| | Copied |
| | </> |
| | ) : ( |
| | <> |
| | <Copy className="h-3 w-3" /> |
| | Copy |
| | </> |
| | )} |
| | </Button> |
| | </div> |
| | )} |
| | </div> |
| |
|
| | {} |
| | <div className="flex-1 overflow-auto"> |
| | {!hasFile ? ( |
| | <div className="h-full flex items-center justify-center p-6"> |
| | <div className="text-center"> |
| | <div className="h-20 w-20 mx-auto rounded-2xl bg-slate-100 flex items-center justify-center mb-4"> |
| | <Code2 className="h-10 w-10 text-slate-300" /> |
| | </div> |
| | <p className="text-slate-400 text-sm">Extracted data will appear here</p> |
| | </div> |
| | </div> |
| | ) : isProcessing ? ( |
| | <div className="h-full flex items-center justify-center p-6"> |
| | <div className="text-center"> |
| | <motion.div |
| | animate={{ rotate: 360 }} |
| | transition={{ duration: 2, repeat: Infinity, ease: "linear" }} |
| | className="h-16 w-16 mx-auto rounded-2xl bg-gradient-to-br from-indigo-100 to-violet-100 flex items-center justify-center mb-4" |
| | > |
| | <Sparkles className="h-8 w-8 text-indigo-500" /> |
| | </motion.div> |
| | <p className="text-slate-700 font-medium mb-1">Extracting data...</p> |
| | <p className="text-slate-400 text-sm">{statusMessage}</p> |
| | |
| | <div className="mt-6 flex items-center justify-center gap-1"> |
| | {[0, 1, 2].map((i) => ( |
| | <motion.div |
| | key={i} |
| | animate={{ scale: [1, 1.2, 1] }} |
| | transition={{ |
| | duration: 0.6, |
| | repeat: Infinity, |
| | delay: i * 0.2, |
| | }} |
| | className="h-2 w-2 rounded-full bg-indigo-400" |
| | /> |
| | ))} |
| | </div> |
| | </div> |
| | </div> |
| | ) : isComplete && Object.keys(fields).length === 0 ? ( |
| | <div className="h-full flex items-center justify-center p-6"> |
| | <div className="text-center"> |
| | <div className="h-20 w-20 mx-auto rounded-2xl bg-amber-100 flex items-center justify-center mb-4"> |
| | <Code2 className="h-10 w-10 text-amber-600" /> |
| | </div> |
| | <p className="text-slate-600 font-medium mb-1">No data extracted</p> |
| | <p className="text-slate-400 text-sm">The document may not contain extractable fields</p> |
| | </div> |
| | </div> |
| | ) : ( |
| | <div className="p-4 font-mono text-sm"> |
| | {activeTab === "text" ? ( |
| | <div |
| | className="text-sm text-slate-700 leading-relaxed" |
| | style={{ |
| | fontFamily: 'system-ui, -apple-system, sans-serif' |
| | }} |
| | > |
| | <div |
| | className="markdown-content" |
| | dangerouslySetInnerHTML={{ __html: renderMarkdownToHTML(fieldsToText(fields)) }} |
| | style={{ |
| | lineHeight: '1.6' |
| | }} |
| | /> |
| | <style>{` |
| | .markdown-content h1 { |
| | font-size: 1.5rem; |
| | font-weight: 700; |
| | color: #0f172a; |
| | margin-top: 1.5rem; |
| | margin-bottom: 1rem; |
| | line-height: 1.3; |
| | } |
| | .markdown-content h2 { |
| | font-size: 1.25rem; |
| | font-weight: 600; |
| | color: #0f172a; |
| | margin-top: 1.25rem; |
| | margin-bottom: 0.75rem; |
| | line-height: 1.3; |
| | } |
| | .markdown-content h3 { |
| | font-size: 1.125rem; |
| | font-weight: 600; |
| | color: #1e293b; |
| | margin-top: 1rem; |
| | margin-bottom: 0.5rem; |
| | line-height: 1.3; |
| | } |
| | .markdown-content p { |
| | margin-top: 0.75rem; |
| | margin-bottom: 0.75rem; |
| | color: #334155; |
| | } |
| | .markdown-content table { |
| | width: 100%; |
| | border-collapse: collapse; |
| | margin: 1.5rem 0; |
| | font-size: 0.875rem; |
| | box-shadow: 0 1px 3px 0 rgba(0, 0, 0, 0.1); |
| | } |
| | .markdown-content table caption { |
| | font-weight: 600; |
| | margin-bottom: 0.5rem; |
| | text-align: left; |
| | } |
| | .markdown-content table th { |
| | background-color: #f8fafc; |
| | border: 1px solid #cbd5e1; |
| | padding: 0.75rem; |
| | text-align: left; |
| | font-weight: 600; |
| | color: #0f172a; |
| | } |
| | .markdown-content table td { |
| | border: 1px solid #cbd5e1; |
| | padding: 0.75rem; |
| | color: #334155; |
| | } |
| | .markdown-content table tr:nth-child(even) { |
| | background-color: #f8fafc; |
| | } |
| | .markdown-content table tr:hover { |
| | background-color: #f1f5f9; |
| | } |
| | .markdown-content strong { |
| | font-weight: 600; |
| | color: #0f172a; |
| | } |
| | .markdown-content em { |
| | font-style: italic; |
| | } |
| | .markdown-content a { |
| | color: #4f46e5; |
| | text-decoration: underline; |
| | } |
| | .markdown-content a:hover { |
| | color: #4338ca; |
| | } |
| | .markdown-content sup { |
| | font-size: 0.75em; |
| | vertical-align: super; |
| | line-height: 0; |
| | position: relative; |
| | top: -0.5em; |
| | } |
| | .markdown-content sub { |
| | font-size: 0.75em; |
| | vertical-align: sub; |
| | line-height: 0; |
| | position: relative; |
| | bottom: -0.25em; |
| | } |
| | .markdown-content ul, .markdown-content ol { |
| | margin: 0.75rem 0; |
| | padding-left: 1.5rem; |
| | } |
| | .markdown-content li { |
| | margin: 0.25rem 0; |
| | } |
| | `}</style> |
| | </div> |
| | ) : activeTab === "json" ? ( |
| | <div className="space-y-1"> |
| | <span className="text-slate-400">{"{"}</span> |
| | {Object.keys(preparedFields).length > 0 ? ( |
| | Object.entries(preparedFields).map(([key, value]) => |
| | renderSection(key, value, 1) |
| | ) |
| | ) : ( |
| | <div className="pl-4 text-slate-400 italic">No fields extracted</div> |
| | )} |
| | <span className="text-slate-400">{"}"}</span> |
| | </div> |
| | ) : ( |
| | <pre className="text-sm text-slate-600 whitespace-pre-wrap"> |
| | {objectToXML(fields).split("\n").map((line, i) => ( |
| | <div key={i} className="hover:bg-slate-50 px-2 -mx-2 rounded"> |
| | {line.includes("<") ? ( |
| | <> |
| | {line.split(/(<\/?[\w\s=".-]+>)/g).map((part, j) => { |
| | if (part.startsWith("</")) { |
| | return ( |
| | <span key={j} className="text-rose-500"> |
| | {part} |
| | </span> |
| | ); |
| | } |
| | if (part.startsWith("<")) { |
| | return ( |
| | <span key={j} className="text-indigo-500"> |
| | {part} |
| | </span> |
| | ); |
| | } |
| | return ( |
| | <span key={j} className="text-slate-700"> |
| | {part} |
| | </span> |
| | ); |
| | })} |
| | </> |
| | ) : ( |
| | line |
| | )} |
| | </div> |
| | ))} |
| | </pre> |
| | )} |
| | </div> |
| | )} |
| | </div> |
| |
|
| | {} |
| | {isComplete && extractionResult && ( |
| | <div className="px-5 py-3 border-t border-slate-100 bg-slate-50/50"> |
| | <div className="flex items-center justify-between text-xs"> |
| | <div className="flex items-center gap-4"> |
| | <div className="flex items-center gap-1.5"> |
| | <div className={cn( |
| | "h-2 w-2 rounded-full", |
| | confidence >= 90 ? "bg-emerald-500" : confidence >= 70 ? "bg-amber-500" : "bg-red-500" |
| | )} /> |
| | <span className="text-slate-500">Confidence:</span> |
| | <span className="font-semibold text-slate-700"> |
| | {confidence > 0 ? `${confidence.toFixed(1)}%` : "N/A"} |
| | </span> |
| | </div> |
| | <div className="flex items-center gap-1.5"> |
| | <span className="text-slate-500">Fields:</span> |
| | <span className="font-semibold text-slate-700">{fieldsExtracted}</span> |
| | </div> |
| | </div> |
| | <span className="text-slate-400"> |
| | Processed in {totalTime >= 1000 ? `${(totalTime / 1000).toFixed(1)}s` : `${totalTime}ms`} |
| | </span> |
| | </div> |
| | </div> |
| | )} |
| | </div> |
| | ); |
| | } |
| |
|