Seth0330 commited on
Commit
a486933
·
verified ·
1 Parent(s): 28d24c1

Update frontend/src/components/ocr/ExtractionOutput.jsx

Browse files
frontend/src/components/ocr/ExtractionOutput.jsx CHANGED
@@ -111,20 +111,59 @@ Payment Terms: Net 30
111
  Thank you for your business!`;
112
 
113
  // Helper function to convert object to XML
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
114
  function objectToXML(obj, rootName = "extraction") {
 
 
 
115
  let xml = `<?xml version="1.0" encoding="UTF-8"?>\n<${rootName}>\n`;
116
 
117
  const convert = (obj, indent = " ") => {
118
  for (const [key, value] of Object.entries(obj)) {
119
  if (value === null || value === undefined) continue;
120
 
 
 
 
 
 
121
  if (Array.isArray(value)) {
122
  value.forEach((item) => {
123
  xml += `${indent}<${key}>\n`;
124
  if (typeof item === "object") {
125
  convert(item, indent + " ");
126
  } else {
127
- xml += `${indent} ${item}\n`;
128
  }
129
  xml += `${indent}</${key}>\n`;
130
  });
@@ -133,16 +172,25 @@ function objectToXML(obj, rootName = "extraction") {
133
  convert(value, indent + " ");
134
  xml += `${indent}</${key}>\n`;
135
  } else {
136
- xml += `${indent}<${key}>${value}</${key}>\n`;
137
  }
138
  }
139
  };
140
 
141
- convert(obj);
142
  xml += `</${rootName}>`;
143
  return xml;
144
  }
145
 
 
 
 
 
 
 
 
 
 
146
  // Helper function to format fields as readable text
147
  function fieldsToText(fields) {
148
  if (!fields || typeof fields !== "object") {
@@ -250,7 +298,8 @@ export default function ExtractionOutput({ hasFile, isProcessing, isComplete, ex
250
  const handleCopy = () => {
251
  let content = "";
252
  if (activeTab === "json") {
253
- content = JSON.stringify(fields, null, 2);
 
254
  } else if (activeTab === "xml") {
255
  content = objectToXML(fields);
256
  } else {
@@ -262,6 +311,11 @@ export default function ExtractionOutput({ hasFile, isProcessing, isComplete, ex
262
  setTimeout(() => setCopied(false), 2000);
263
  };
264
 
 
 
 
 
 
265
  // Update expanded sections when fields change
266
  React.useEffect(() => {
267
  if (extractionResult?.fields) {
@@ -472,8 +526,8 @@ export default function ExtractionOutput({ hasFile, isProcessing, isComplete, ex
472
  ) : activeTab === "json" ? (
473
  <div className="space-y-1">
474
  <span className="text-slate-400">{"{"}</span>
475
- {Object.keys(fields).length > 0 ? (
476
- Object.entries(fields).map(([key, value]) =>
477
  renderSection(key, value, 1)
478
  )
479
  ) : (
 
111
  Thank you for your business!`;
112
 
113
  // Helper function to convert object to XML
114
+ // Prepare fields for JSON/XML output - remove duplicates and restructure
115
+ function prepareFieldsForOutput(fields, format = "json") {
116
+ if (!fields || typeof fields !== "object") {
117
+ return fields;
118
+ }
119
+
120
+ const output = { ...fields };
121
+
122
+ // Remove full_text if pages array exists (to avoid duplication)
123
+ if (output.pages && Array.isArray(output.pages) && output.pages.length > 0) {
124
+ delete output.full_text;
125
+ }
126
+
127
+ // For JSON: restructure pages into separate top-level fields (page_1, page_2, etc.)
128
+ if (format === "json" && output.pages && Array.isArray(output.pages)) {
129
+ output.pages.forEach((page, idx) => {
130
+ const pageNum = page.page_number || idx + 1;
131
+ output[`page_${pageNum}`] = {
132
+ text: page.text || "",
133
+ fields: page.fields || {},
134
+ confidence: page.confidence || 0,
135
+ doc_type: page.doc_type || "other"
136
+ };
137
+ });
138
+ // Remove pages array - we now have page_1, page_2, etc. as separate fields
139
+ delete output.pages;
140
+ }
141
+
142
+ return output;
143
+ }
144
+
145
  function objectToXML(obj, rootName = "extraction") {
146
+ // Prepare fields - remove full_text if pages exist
147
+ const preparedObj = prepareFieldsForOutput(obj, "xml");
148
+
149
  let xml = `<?xml version="1.0" encoding="UTF-8"?>\n<${rootName}>\n`;
150
 
151
  const convert = (obj, indent = " ") => {
152
  for (const [key, value] of Object.entries(obj)) {
153
  if (value === null || value === undefined) continue;
154
 
155
+ // Skip full_text if pages exist (already handled in prepareFieldsForOutput)
156
+ if (key === "full_text" && obj.pages && Array.isArray(obj.pages) && obj.pages.length > 0) {
157
+ continue;
158
+ }
159
+
160
  if (Array.isArray(value)) {
161
  value.forEach((item) => {
162
  xml += `${indent}<${key}>\n`;
163
  if (typeof item === "object") {
164
  convert(item, indent + " ");
165
  } else {
166
+ xml += `${indent} ${escapeXML(String(item))}\n`;
167
  }
168
  xml += `${indent}</${key}>\n`;
169
  });
 
172
  convert(value, indent + " ");
173
  xml += `${indent}</${key}>\n`;
174
  } else {
175
+ xml += `${indent}<${key}>${escapeXML(String(value))}</${key}>\n`;
176
  }
177
  }
178
  };
179
 
180
+ convert(preparedObj);
181
  xml += `</${rootName}>`;
182
  return xml;
183
  }
184
 
185
+ function escapeXML(str) {
186
+ return str
187
+ .replace(/&/g, "&amp;")
188
+ .replace(/</g, "&lt;")
189
+ .replace(/>/g, "&gt;")
190
+ .replace(/"/g, "&quot;")
191
+ .replace(/'/g, "&apos;");
192
+ }
193
+
194
  // Helper function to format fields as readable text
195
  function fieldsToText(fields) {
196
  if (!fields || typeof fields !== "object") {
 
298
  const handleCopy = () => {
299
  let content = "";
300
  if (activeTab === "json") {
301
+ const preparedFields = prepareFieldsForOutput(fields, "json");
302
+ content = JSON.stringify(preparedFields, null, 2);
303
  } else if (activeTab === "xml") {
304
  content = objectToXML(fields);
305
  } else {
 
311
  setTimeout(() => setCopied(false), 2000);
312
  };
313
 
314
+ // Get prepared fields for display
315
+ const preparedFields = React.useMemo(() => {
316
+ return prepareFieldsForOutput(fields, "json");
317
+ }, [fields]);
318
+
319
  // Update expanded sections when fields change
320
  React.useEffect(() => {
321
  if (extractionResult?.fields) {
 
526
  ) : activeTab === "json" ? (
527
  <div className="space-y-1">
528
  <span className="text-slate-400">{"{"}</span>
529
+ {Object.keys(preparedFields).length > 0 ? (
530
+ Object.entries(preparedFields).map(([key, value]) =>
531
  renderSection(key, value, 1)
532
  )
533
  ) : (