Seth0330 commited on
Commit
d8524fe
·
verified ·
1 Parent(s): 46697b4

Update frontend/src/components/ExportButtons.jsx

Browse files
frontend/src/components/ExportButtons.jsx CHANGED
@@ -6,9 +6,7 @@ import {
6
  FileCode2,
7
  Check,
8
  Share2,
9
- FileJson,
10
- Copy,
11
- Mail,
12
  Link2,
13
  } from "lucide-react";
14
  import { Button } from "@/components/ui/button";
@@ -139,7 +137,98 @@ export default function ExportButtons({ isComplete, extractionResult }) {
139
  const [downloading, setDownloading] = useState(null);
140
  const [copied, setCopied] = useState(false);
141
 
142
- const handleDownload = (format) => {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
143
  if (!extractionResult || !extractionResult.fields) {
144
  console.error("No extraction data available");
145
  return;
@@ -162,6 +251,122 @@ export default function ExportButtons({ isComplete, extractionResult }) {
162
  content = objectToXML(fields);
163
  filename = `extraction_${new Date().toISOString().split('T')[0]}.xml`;
164
  mimeType = "application/xml";
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
165
  }
166
 
167
  // Create blob and download
@@ -195,100 +400,27 @@ export default function ExportButtons({ isComplete, extractionResult }) {
195
  animate={{ opacity: 1, y: 0 }}
196
  className="flex items-center gap-3"
197
  >
198
- {/* JSON Download */}
199
- <Button
200
- onClick={() => handleDownload("json")}
201
- disabled={downloading === "json"}
202
- className={cn(
203
- "h-11 px-5 rounded-xl font-semibold transition-all duration-200",
204
- "bg-gradient-to-r from-indigo-600 to-violet-600 hover:from-indigo-700 hover:to-violet-700",
205
- "shadow-lg shadow-indigo-500/25 hover:shadow-xl hover:shadow-indigo-500/30",
206
- "text-white"
207
- )}
208
- >
209
- <AnimatePresence mode="wait">
210
- {downloading === "json" ? (
211
- <motion.div
212
- key="loading"
213
- initial={{ opacity: 0, scale: 0.8 }}
214
- animate={{ opacity: 1, scale: 1 }}
215
- exit={{ opacity: 0, scale: 0.8 }}
216
- className="flex items-center gap-2"
217
- >
218
- <motion.div
219
- animate={{ rotate: 360 }}
220
- transition={{ duration: 1, repeat: Infinity, ease: "linear" }}
221
- >
222
- <Download className="h-4 w-4" />
223
- </motion.div>
224
- Downloading...
225
- </motion.div>
226
- ) : (
227
- <motion.div
228
- key="default"
229
- initial={{ opacity: 0, scale: 0.8 }}
230
- animate={{ opacity: 1, scale: 1 }}
231
- exit={{ opacity: 0, scale: 0.8 }}
232
- className="flex items-center gap-2"
233
- >
234
- <Braces className="h-4 w-4" />
235
- Download JSON
236
- </motion.div>
237
- )}
238
- </AnimatePresence>
239
- </Button>
240
-
241
- {/* XML Download */}
242
- <Button
243
- onClick={() => handleDownload("xml")}
244
- disabled={downloading === "xml"}
245
- variant="outline"
246
- className={cn(
247
- "h-11 px-5 rounded-xl font-semibold transition-all duration-200",
248
- "border-2 border-slate-200 hover:border-slate-300",
249
- "hover:bg-slate-50"
250
- )}
251
- >
252
- <AnimatePresence mode="wait">
253
- {downloading === "xml" ? (
254
- <motion.div
255
- key="loading"
256
- initial={{ opacity: 0, scale: 0.8 }}
257
- animate={{ opacity: 1, scale: 1 }}
258
- exit={{ opacity: 0, scale: 0.8 }}
259
- className="flex items-center gap-2"
260
- >
261
  <motion.div
262
  animate={{ rotate: 360 }}
263
  transition={{ duration: 1, repeat: Infinity, ease: "linear" }}
264
  >
265
  <Download className="h-4 w-4" />
266
  </motion.div>
267
- Downloading...
268
- </motion.div>
269
- ) : (
270
- <motion.div
271
- key="default"
272
- initial={{ opacity: 0, scale: 0.8 }}
273
- animate={{ opacity: 1, scale: 1 }}
274
- exit={{ opacity: 0, scale: 0.8 }}
275
- className="flex items-center gap-2"
276
- >
277
- <FileCode2 className="h-4 w-4" />
278
- Download XML
279
- </motion.div>
280
- )}
281
- </AnimatePresence>
282
- </Button>
283
-
284
- {/* More Options Dropdown */}
285
- <DropdownMenu>
286
- <DropdownMenuTrigger asChild>
287
- <Button variant="ghost" className="h-11 w-11 rounded-xl">
288
- <Share2 className="h-4 w-4" />
289
  </Button>
290
  </DropdownMenuTrigger>
291
- <DropdownMenuContent align="end" className="w-48 rounded-xl p-2">
292
  <DropdownMenuItem
293
  className="rounded-lg cursor-pointer"
294
  onClick={handleCopyLink}
@@ -300,18 +432,60 @@ export default function ExportButtons({ isComplete, extractionResult }) {
300
  )}
301
  {copied ? "Link copied!" : "Copy share link"}
302
  </DropdownMenuItem>
303
- <DropdownMenuItem className="rounded-lg cursor-pointer">
304
- <Copy className="h-4 w-4 mr-2" />
305
- Copy to clipboard
306
- </DropdownMenuItem>
307
  <DropdownMenuSeparator />
308
- <DropdownMenuItem className="rounded-lg cursor-pointer">
309
- <Mail className="h-4 w-4 mr-2" />
310
- Send via email
 
 
 
 
 
 
 
 
 
 
 
 
 
 
311
  </DropdownMenuItem>
312
- <DropdownMenuItem className="rounded-lg cursor-pointer">
313
- <FileJson className="h-4 w-4 mr-2" />
314
- Export to Google Sheets
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
315
  </DropdownMenuItem>
316
  </DropdownMenuContent>
317
  </DropdownMenu>
 
6
  FileCode2,
7
  Check,
8
  Share2,
9
+ FileText,
 
 
10
  Link2,
11
  } from "lucide-react";
12
  import { Button } from "@/components/ui/button";
 
137
  const [downloading, setDownloading] = useState(null);
138
  const [copied, setCopied] = useState(false);
139
 
140
+ // Helper function to extract text from fields (same as in ExtractionOutput)
141
+ const extractTextFromFields = (fields) => {
142
+ if (!fields || typeof fields !== "object") {
143
+ return "";
144
+ }
145
+
146
+ // Check for page_X structure first (preferred format)
147
+ const pageKeys = Object.keys(fields).filter(key => key.startsWith("page_"));
148
+ if (pageKeys.length > 0) {
149
+ // Get text from first page (or combine all pages)
150
+ const pageTexts = pageKeys.map(key => {
151
+ const page = fields[key];
152
+ if (page && page.text) {
153
+ return page.text;
154
+ }
155
+ return "";
156
+ }).filter(text => text);
157
+
158
+ if (pageTexts.length > 0) {
159
+ return pageTexts.join("\n\n");
160
+ }
161
+ }
162
+
163
+ // Fallback to full_text
164
+ if (fields.full_text) {
165
+ return fields.full_text;
166
+ }
167
+
168
+ return "";
169
+ };
170
+
171
+ // Helper function to render markdown to HTML (same as in ExtractionOutput)
172
+ const renderMarkdownToHTML = (text) => {
173
+ if (!text) return "";
174
+
175
+ let html = text;
176
+
177
+ // Convert LaTeX-style superscripts/subscripts FIRST
178
+ html = html.replace(/\$\s*\^\s*\{([^}]+)\}\s*\$/g, '<sup>$1</sup>');
179
+ html = html.replace(/\$\s*\^\s*([^\s$<>]+)\s*\$/g, '<sup>$1</sup>');
180
+ html = html.replace(/\$\s*_\s*\{([^}]+)\}\s*\$/g, '<sub>$1</sub>');
181
+ html = html.replace(/\$\s*_\s*([^\s$<>]+)\s*\$/g, '<sub>$1</sub>');
182
+
183
+ // Protect HTML table blocks
184
+ const htmlBlocks = [];
185
+ let htmlBlockIndex = 0;
186
+
187
+ html = html.replace(/<table[\s\S]*?<\/table>/gi, (match) => {
188
+ const placeholder = `__HTML_BLOCK_${htmlBlockIndex}__`;
189
+ htmlBlocks[htmlBlockIndex] = match;
190
+ htmlBlockIndex++;
191
+ return placeholder;
192
+ });
193
+
194
+ // Convert markdown headers
195
+ html = html.replace(/^### (.*$)/gim, '<h3>$1</h3>');
196
+ html = html.replace(/^## (.*$)/gim, '<h2>$1</h2>');
197
+ html = html.replace(/^# (.*$)/gim, '<h1>$1</h1>');
198
+
199
+ // Convert markdown bold/italic
200
+ html = html.replace(/\*\*(.*?)\*\*/g, '<strong>$1</strong>');
201
+ html = html.replace(/\*(.*?)\*/g, '<em>$1</em>');
202
+
203
+ // Convert markdown links
204
+ html = html.replace(/\[([^\]]+)\]\(([^)]+)\)/g, '<a href="$2">$1</a>');
205
+
206
+ // Process line breaks
207
+ const parts = html.split(/(__HTML_BLOCK_\d+__)/);
208
+ const processedParts = parts.map((part) => {
209
+ if (part.match(/^__HTML_BLOCK_\d+__$/)) {
210
+ const blockIndex = parseInt(part.match(/\d+/)[0]);
211
+ return htmlBlocks[blockIndex];
212
+ } else {
213
+ let processed = part;
214
+ processed = processed.replace(/\n\n+/g, '</p><p>');
215
+ processed = processed.replace(/([^\n>])\n([^\n<])/g, '$1<br>$2');
216
+ if (processed.trim() && !processed.trim().startsWith('<')) {
217
+ processed = '<p>' + processed + '</p>';
218
+ }
219
+ return processed;
220
+ }
221
+ });
222
+
223
+ html = processedParts.join('');
224
+ html = html.replace(/<p><\/p>/g, '');
225
+ html = html.replace(/<p>\s*<br>\s*<\/p>/g, '');
226
+ html = html.replace(/<p>\s*<\/p>/g, '');
227
+
228
+ return html;
229
+ };
230
+
231
+ const handleDownload = async (format) => {
232
  if (!extractionResult || !extractionResult.fields) {
233
  console.error("No extraction data available");
234
  return;
 
251
  content = objectToXML(fields);
252
  filename = `extraction_${new Date().toISOString().split('T')[0]}.xml`;
253
  mimeType = "application/xml";
254
+ } else if (format === "docx") {
255
+ // For DOCX, create a Word-compatible HTML document that preserves layout
256
+ // Extract text and convert to HTML (same as text viewer)
257
+ const textContent = extractTextFromFields(fields);
258
+ const htmlContent = renderMarkdownToHTML(textContent);
259
+
260
+ // Create a Word-compatible HTML document with proper MIME type
261
+ // Word can open HTML files with .docx extension if we use the right MIME type
262
+ const wordHTML = `<!DOCTYPE html>
263
+ <html xmlns:o="urn:schemas-microsoft-com:office:office" xmlns:w="urn:schemas-microsoft-com:office:word" xmlns="http://www.w3.org/TR/REC-html40">
264
+ <head>
265
+ <meta charset="UTF-8">
266
+ <meta name="ProgId" content="Word.Document">
267
+ <meta name="Generator" content="Microsoft Word">
268
+ <meta name="Originator" content="Microsoft Word">
269
+ <!--[if gte mso 9]><xml>
270
+ <w:WordDocument>
271
+ <w:View>Print</w:View>
272
+ <w:Zoom>100</w:Zoom>
273
+ <w:DoNotOptimizeForBrowser/>
274
+ </w:WordDocument>
275
+ </xml><![endif]-->
276
+ <title>Document Extraction</title>
277
+ <style>
278
+ @page {
279
+ size: 8.5in 11in;
280
+ margin: 1in;
281
+ }
282
+ body {
283
+ font-family: 'Calibri', 'Arial', sans-serif;
284
+ font-size: 11pt;
285
+ line-height: 1.6;
286
+ margin: 0;
287
+ color: #333;
288
+ }
289
+ h1 {
290
+ font-size: 18pt;
291
+ font-weight: bold;
292
+ color: #0f172a;
293
+ margin-top: 24pt;
294
+ margin-bottom: 12pt;
295
+ page-break-after: avoid;
296
+ }
297
+ h2 {
298
+ font-size: 16pt;
299
+ font-weight: 600;
300
+ color: #0f172a;
301
+ margin-top: 20pt;
302
+ margin-bottom: 10pt;
303
+ page-break-after: avoid;
304
+ }
305
+ h3 {
306
+ font-size: 14pt;
307
+ font-weight: 600;
308
+ color: #1e293b;
309
+ margin-top: 16pt;
310
+ margin-bottom: 8pt;
311
+ page-break-after: avoid;
312
+ }
313
+ p {
314
+ margin-top: 6pt;
315
+ margin-bottom: 6pt;
316
+ }
317
+ table {
318
+ width: 100%;
319
+ border-collapse: collapse;
320
+ margin: 12pt 0;
321
+ font-size: 10pt;
322
+ page-break-inside: avoid;
323
+ }
324
+ table th {
325
+ background-color: #f8fafc;
326
+ border: 1pt solid #cbd5e1;
327
+ padding: 6pt;
328
+ text-align: left;
329
+ font-weight: 600;
330
+ color: #0f172a;
331
+ }
332
+ table td {
333
+ border: 1pt solid #cbd5e1;
334
+ padding: 6pt;
335
+ color: #334155;
336
+ }
337
+ table tr:nth-child(even) {
338
+ background-color: #f8fafc;
339
+ }
340
+ sup {
341
+ font-size: 0.75em;
342
+ vertical-align: super;
343
+ line-height: 0;
344
+ }
345
+ sub {
346
+ font-size: 0.75em;
347
+ vertical-align: sub;
348
+ line-height: 0;
349
+ }
350
+ strong {
351
+ font-weight: 600;
352
+ }
353
+ em {
354
+ font-style: italic;
355
+ }
356
+ a {
357
+ color: #4f46e5;
358
+ text-decoration: underline;
359
+ }
360
+ </style>
361
+ </head>
362
+ <body>
363
+ ${htmlContent}
364
+ </body>
365
+ </html>`;
366
+
367
+ content = wordHTML;
368
+ filename = `extraction_${new Date().toISOString().split('T')[0]}.doc`;
369
+ mimeType = "application/msword";
370
  }
371
 
372
  // Create blob and download
 
400
  animate={{ opacity: 1, y: 0 }}
401
  className="flex items-center gap-3"
402
  >
403
+ {/* Export Options Dropdown */}
404
+ <DropdownMenu>
405
+ <DropdownMenuTrigger asChild>
406
+ <Button
407
+ variant="ghost"
408
+ className="h-11 w-11 rounded-xl hover:bg-slate-100"
409
+ disabled={downloading !== null}
410
+ >
411
+ {downloading ? (
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
412
  <motion.div
413
  animate={{ rotate: 360 }}
414
  transition={{ duration: 1, repeat: Infinity, ease: "linear" }}
415
  >
416
  <Download className="h-4 w-4" />
417
  </motion.div>
418
+ ) : (
419
+ <Share2 className="h-4 w-4" />
420
+ )}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
421
  </Button>
422
  </DropdownMenuTrigger>
423
+ <DropdownMenuContent align="end" className="w-56 rounded-xl p-2">
424
  <DropdownMenuItem
425
  className="rounded-lg cursor-pointer"
426
  onClick={handleCopyLink}
 
432
  )}
433
  {copied ? "Link copied!" : "Copy share link"}
434
  </DropdownMenuItem>
 
 
 
 
435
  <DropdownMenuSeparator />
436
+ <DropdownMenuItem
437
+ className="rounded-lg cursor-pointer"
438
+ onClick={() => handleDownload("docx")}
439
+ disabled={downloading === "docx"}
440
+ >
441
+ {downloading === "docx" ? (
442
+ <motion.div
443
+ animate={{ rotate: 360 }}
444
+ transition={{ duration: 1, repeat: Infinity, ease: "linear" }}
445
+ className="h-4 w-4 mr-2"
446
+ >
447
+ <Download className="h-4 w-4" />
448
+ </motion.div>
449
+ ) : (
450
+ <FileText className="h-4 w-4 mr-2 text-blue-600" />
451
+ )}
452
+ Download Docx
453
  </DropdownMenuItem>
454
+ <DropdownMenuItem
455
+ className="rounded-lg cursor-pointer"
456
+ onClick={() => handleDownload("json")}
457
+ disabled={downloading === "json"}
458
+ >
459
+ {downloading === "json" ? (
460
+ <motion.div
461
+ animate={{ rotate: 360 }}
462
+ transition={{ duration: 1, repeat: Infinity, ease: "linear" }}
463
+ className="h-4 w-4 mr-2"
464
+ >
465
+ <Download className="h-4 w-4" />
466
+ </motion.div>
467
+ ) : (
468
+ <Braces className="h-4 w-4 mr-2 text-indigo-600" />
469
+ )}
470
+ Download JSON
471
+ </DropdownMenuItem>
472
+ <DropdownMenuItem
473
+ className="rounded-lg cursor-pointer"
474
+ onClick={() => handleDownload("xml")}
475
+ disabled={downloading === "xml"}
476
+ >
477
+ {downloading === "xml" ? (
478
+ <motion.div
479
+ animate={{ rotate: 360 }}
480
+ transition={{ duration: 1, repeat: Infinity, ease: "linear" }}
481
+ className="h-4 w-4 mr-2"
482
+ >
483
+ <Download className="h-4 w-4" />
484
+ </motion.div>
485
+ ) : (
486
+ <FileCode2 className="h-4 w-4 mr-2 text-slate-600" />
487
+ )}
488
+ Download XML
489
  </DropdownMenuItem>
490
  </DropdownMenuContent>
491
  </DropdownMenu>