Seth0330 commited on
Commit
d0cfc3b
·
verified ·
1 Parent(s): a486933

Update frontend/src/components/ocr/ExtractionOutput.jsx

Browse files
frontend/src/components/ocr/ExtractionOutput.jsx CHANGED
@@ -119,21 +119,55 @@ function prepareFieldsForOutput(fields, format = "json") {
119
 
120
  const output = { ...fields };
121
 
122
- // Remove full_text if pages array exists (to avoid duplication)
123
  if (output.pages && Array.isArray(output.pages) && output.pages.length > 0) {
124
  delete output.full_text;
 
 
 
 
 
 
 
 
 
 
 
 
125
  }
126
 
127
- // For JSON: restructure pages into separate top-level fields (page_1, page_2, etc.)
128
- if (format === "json" && output.pages && Array.isArray(output.pages)) {
 
 
 
129
  output.pages.forEach((page, idx) => {
130
  const pageNum = page.page_number || idx + 1;
131
- output[`page_${pageNum}`] = {
 
 
 
 
 
 
 
 
 
 
 
 
 
132
  text: page.text || "",
133
- fields: page.fields || {},
134
  confidence: page.confidence || 0,
135
  doc_type: page.doc_type || "other"
136
  };
 
 
 
 
 
 
 
137
  });
138
  // Remove pages array - we now have page_1, page_2, etc. as separate fields
139
  delete output.pages;
@@ -488,7 +522,7 @@ export default function ExtractionOutput({ hasFile, isProcessing, isComplete, ex
488
  >
489
  <Sparkles className="h-8 w-8 text-indigo-500" />
490
  </motion.div>
491
- <p className="text-slate-700 font-medium mb-1">AI is extracting data...</p>
492
  <p className="text-slate-400 text-sm">Analyzing document structure</p>
493
 
494
  <div className="mt-6 flex items-center justify-center gap-1">
 
119
 
120
  const output = { ...fields };
121
 
122
+ // Remove full_text from top-level if pages array exists (to avoid duplication)
123
  if (output.pages && Array.isArray(output.pages) && output.pages.length > 0) {
124
  delete output.full_text;
125
+
126
+ // Clean up each page: remove full_text from page.fields (it duplicates page.text)
127
+ output.pages = output.pages.map(page => {
128
+ const cleanedPage = { ...page };
129
+ if (cleanedPage.fields && typeof cleanedPage.fields === "object") {
130
+ const cleanedFields = { ...cleanedPage.fields };
131
+ // Remove full_text from page fields (duplicates page.text)
132
+ delete cleanedFields.full_text;
133
+ cleanedPage.fields = cleanedFields;
134
+ }
135
+ return cleanedPage;
136
+ });
137
  }
138
 
139
+ // For JSON and XML: restructure pages into separate top-level fields (page_1, page_2, etc.)
140
+ if ((format === "json" || format === "xml") && output.pages && Array.isArray(output.pages)) {
141
+ // Get top-level field keys (these are merged from all pages - avoid duplicating in page fields)
142
+ const topLevelKeys = new Set(Object.keys(output).filter(k => k !== "pages" && k !== "full_text"));
143
+
144
  output.pages.forEach((page, idx) => {
145
  const pageNum = page.page_number || idx + 1;
146
+ const pageFields = page.fields || {};
147
+
148
+ // Remove duplicate fields from page.fields:
149
+ // 1. Remove full_text (duplicates page.text)
150
+ // 2. Remove fields that match top-level fields (already shown at root)
151
+ const cleanedPageFields = {};
152
+ for (const [key, value] of Object.entries(pageFields)) {
153
+ // Skip full_text and fields that match top-level exactly
154
+ if (key !== "full_text" && (!topLevelKeys.has(key) || (value !== output[key]))) {
155
+ cleanedPageFields[key] = value;
156
+ }
157
+ }
158
+
159
+ const pageObj = {
160
  text: page.text || "",
 
161
  confidence: page.confidence || 0,
162
  doc_type: page.doc_type || "other"
163
  };
164
+
165
+ // Only add fields if there are unique page-specific fields
166
+ if (Object.keys(cleanedPageFields).length > 0) {
167
+ pageObj.fields = cleanedPageFields;
168
+ }
169
+
170
+ output[`page_${pageNum}`] = pageObj;
171
  });
172
  // Remove pages array - we now have page_1, page_2, etc. as separate fields
173
  delete output.pages;
 
522
  >
523
  <Sparkles className="h-8 w-8 text-indigo-500" />
524
  </motion.div>
525
+ <p className="text-slate-700 font-medium mb-1">Extracting data...</p>
526
  <p className="text-slate-400 text-sm">Analyzing document structure</p>
527
 
528
  <div className="mt-6 flex items-center justify-center gap-1">