vn6295337 Claude Opus 4.5 commited on
Commit
2eda359
·
1 Parent(s): 2413602

Add parsing evaluation UI to frontend

Browse files

- Add evalParsing and getSupportedFormats API functions
- Create ParsingEval component to display Docling parsing results
- Add "Test Parsing" button to each staged file in Sidebar
- Expand supported file formats (DOCX, PPTX, XLSX, HTML, images)
- Increase max file size to 10MB

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>

frontend/src/api/client.js CHANGED
@@ -104,3 +104,24 @@ export async function getDropboxFile(filePath, accessToken) {
104
  });
105
  return res.json();
106
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
104
  });
105
  return res.json();
106
  }
107
+
108
+ /**
109
+ * Evaluate Docling parsing on a Dropbox file
110
+ * Returns element breakdown and parsing metrics
111
+ */
112
+ export async function evalParsing(filePath, accessToken) {
113
+ const res = await fetch(`${API_BASE}/eval/parsing`, {
114
+ method: 'POST',
115
+ headers: { 'Content-Type': 'application/json' },
116
+ body: JSON.stringify({ path: filePath, access_token: accessToken })
117
+ });
118
+ return res.json();
119
+ }
120
+
121
+ /**
122
+ * Get supported document formats for Docling parsing
123
+ */
124
+ export async function getSupportedFormats() {
125
+ const res = await fetch(`${API_BASE}/eval/formats`);
126
+ return res.json();
127
+ }
frontend/src/components/CloudConnect.jsx CHANGED
@@ -4,9 +4,9 @@ import { exchangeDropboxCode, getDropboxFolder } from '../api/client';
4
  const DROPBOX_APP_KEY = import.meta.env.VITE_DROPBOX_APP_KEY;
5
  const REDIRECT_URI = window.location.origin;
6
 
7
- // Supported file extensions
8
- const SUPPORTED_EXTENSIONS = ['.txt', '.md', '.pdf'];
9
- const MAX_FILE_SIZE_MB = 5;
10
  const MAX_FILE_SIZE_BYTES = MAX_FILE_SIZE_MB * 1024 * 1024;
11
 
12
  export default function CloudConnect({ onFilesStaged, stagedFiles = [], onAccessTokenChange }) {
@@ -269,7 +269,7 @@ export default function CloudConnect({ onFilesStaged, stagedFiles = [], onAccess
269
  Connect Dropbox
270
  </button>
271
  <p className="text-xs text-slate-500 text-center">
272
- Supports {SUPPORTED_EXTENSIONS.join(', ')} (max {MAX_FILE_SIZE_MB} MB)
273
  </p>
274
  </div>
275
  ) : (
@@ -306,7 +306,7 @@ export default function CloudConnect({ onFilesStaged, stagedFiles = [], onAccess
306
 
307
  {/* File type hints */}
308
  <p className="text-xs text-slate-500 text-center">
309
- Supports {SUPPORTED_EXTENSIONS.join(', ')} (max {MAX_FILE_SIZE_MB} MB)
310
  </p>
311
  </div>
312
  )}
 
4
  const DROPBOX_APP_KEY = import.meta.env.VITE_DROPBOX_APP_KEY;
5
  const REDIRECT_URI = window.location.origin;
6
 
7
+ // Supported file extensions (Docling supports many formats)
8
+ const SUPPORTED_EXTENSIONS = ['.txt', '.md', '.pdf', '.docx', '.pptx', '.xlsx', '.html', '.htm', '.jpg', '.jpeg', '.png', '.bmp', '.tiff'];
9
+ const MAX_FILE_SIZE_MB = 10;
10
  const MAX_FILE_SIZE_BYTES = MAX_FILE_SIZE_MB * 1024 * 1024;
11
 
12
  export default function CloudConnect({ onFilesStaged, stagedFiles = [], onAccessTokenChange }) {
 
269
  Connect Dropbox
270
  </button>
271
  <p className="text-xs text-slate-500 text-center">
272
+ PDF, DOCX, PPTX, XLSX, HTML, images (max {MAX_FILE_SIZE_MB} MB)
273
  </p>
274
  </div>
275
  ) : (
 
306
 
307
  {/* File type hints */}
308
  <p className="text-xs text-slate-500 text-center">
309
+ PDF, DOCX, PPTX, XLSX, HTML, images (max {MAX_FILE_SIZE_MB} MB)
310
  </p>
311
  </div>
312
  )}
frontend/src/components/ParsingEval.jsx ADDED
@@ -0,0 +1,215 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import { useState } from 'react';
2
+ import { evalParsing } from '../api/client';
3
+
4
+ export default function ParsingEval({ file, accessToken, onClose }) {
5
+ const [loading, setLoading] = useState(false);
6
+ const [result, setResult] = useState(null);
7
+ const [error, setError] = useState(null);
8
+
9
+ const runEval = async () => {
10
+ setLoading(true);
11
+ setError(null);
12
+ setResult(null);
13
+
14
+ try {
15
+ const data = await evalParsing(file.path_lower, accessToken);
16
+ if (data.error) {
17
+ setError(data.error);
18
+ } else {
19
+ setResult(data);
20
+ }
21
+ } catch (err) {
22
+ setError(err.message);
23
+ }
24
+
25
+ setLoading(false);
26
+ };
27
+
28
+ // Format number with commas
29
+ const formatNumber = (num) => {
30
+ return num?.toLocaleString() || '0';
31
+ };
32
+
33
+ return (
34
+ <div className="fixed inset-0 bg-black/70 flex items-center justify-center z-50 p-4">
35
+ <div className="bg-slate-800 border border-slate-700 rounded-xl w-full max-w-2xl max-h-[85vh] flex flex-col shadow-xl">
36
+ {/* Header */}
37
+ <div className="p-4 border-b border-slate-700 flex items-center justify-between">
38
+ <div>
39
+ <h3 className="font-medium text-slate-100">Docling Parsing Evaluation</h3>
40
+ <p className="text-sm text-slate-400 mt-0.5 truncate max-w-md">{file.name}</p>
41
+ </div>
42
+ <button
43
+ type="button"
44
+ onClick={onClose}
45
+ className="p-1.5 hover:bg-slate-700 rounded-lg transition-colors"
46
+ aria-label="Close"
47
+ >
48
+ <svg className="w-5 h-5 text-slate-400" fill="none" stroke="currentColor" viewBox="0 0 24 24">
49
+ <path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M6 18L18 6M6 6l12 12" />
50
+ </svg>
51
+ </button>
52
+ </div>
53
+
54
+ {/* Content */}
55
+ <div className="flex-1 overflow-auto p-4">
56
+ {!result && !loading && !error && (
57
+ <div className="text-center py-8">
58
+ <svg className="w-16 h-16 text-slate-600 mx-auto mb-4" fill="none" stroke="currentColor" viewBox="0 0 24 24">
59
+ <path strokeLinecap="round" strokeLinejoin="round" strokeWidth={1.5} d="M9 5H7a2 2 0 00-2 2v12a2 2 0 002 2h10a2 2 0 002-2V7a2 2 0 00-2-2h-2M9 5a2 2 0 002 2h2a2 2 0 002-2M9 5a2 2 0 012-2h2a2 2 0 012 2m-6 9l2 2 4-4" />
60
+ </svg>
61
+ <p className="text-slate-300 mb-2">Test Docling's document parsing</p>
62
+ <p className="text-sm text-slate-500 mb-6">
63
+ This will download the file and analyze how Docling extracts structure
64
+ </p>
65
+ <button
66
+ type="button"
67
+ onClick={runEval}
68
+ className="px-6 py-2.5 bg-blue-600 text-white rounded-lg font-medium hover:bg-blue-700 active:scale-[0.98] transition-all"
69
+ >
70
+ Run Parsing Evaluation
71
+ </button>
72
+ </div>
73
+ )}
74
+
75
+ {loading && (
76
+ <div className="text-center py-12">
77
+ <div className="w-10 h-10 border-3 border-blue-400 border-t-transparent rounded-full animate-spin mx-auto mb-4"></div>
78
+ <p className="text-slate-300">Parsing document with Docling...</p>
79
+ <p className="text-sm text-slate-500 mt-2">This may take a moment for large files</p>
80
+ </div>
81
+ )}
82
+
83
+ {error && (
84
+ <div className="bg-red-900/30 border border-red-700 rounded-lg p-4">
85
+ <div className="flex items-start gap-3">
86
+ <svg className="w-5 h-5 text-red-400 mt-0.5" fill="none" stroke="currentColor" viewBox="0 0 24 24">
87
+ <path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M12 8v4m0 4h.01M21 12a9 9 0 11-18 0 9 9 0 0118 0z" />
88
+ </svg>
89
+ <div>
90
+ <p className="text-red-400 font-medium">Parsing failed</p>
91
+ <p className="text-sm text-red-300 mt-1">{error}</p>
92
+ </div>
93
+ </div>
94
+ <button
95
+ type="button"
96
+ onClick={runEval}
97
+ className="mt-4 px-4 py-2 bg-slate-700 text-slate-200 rounded-lg text-sm hover:bg-slate-600 transition-colors"
98
+ >
99
+ Try Again
100
+ </button>
101
+ </div>
102
+ )}
103
+
104
+ {result && (
105
+ <div className="space-y-6">
106
+ {/* Status Badge */}
107
+ <div className="flex items-center gap-2">
108
+ {result.status === 'OK' ? (
109
+ <span className="flex items-center gap-1.5 bg-green-900/40 border border-green-700 text-green-400 px-3 py-1 rounded-full text-sm font-medium">
110
+ <svg className="w-4 h-4" fill="none" stroke="currentColor" viewBox="0 0 24 24">
111
+ <path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M5 13l4 4L19 7" />
112
+ </svg>
113
+ Parsing Successful
114
+ </span>
115
+ ) : (
116
+ <span className="flex items-center gap-1.5 bg-red-900/40 border border-red-700 text-red-400 px-3 py-1 rounded-full text-sm font-medium">
117
+ <svg className="w-4 h-4" fill="none" stroke="currentColor" viewBox="0 0 24 24">
118
+ <path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M6 18L18 6M6 6l12 12" />
119
+ </svg>
120
+ {result.status}
121
+ </span>
122
+ )}
123
+ <span className="text-slate-500 text-sm">{result.format?.toUpperCase()}</span>
124
+ </div>
125
+
126
+ {/* Stats Grid */}
127
+ <div className="grid grid-cols-2 sm:grid-cols-4 gap-3">
128
+ <div className="bg-slate-900 border border-slate-700 rounded-lg p-3 text-center">
129
+ <p className="text-2xl font-bold text-blue-400">{formatNumber(result.total_elements)}</p>
130
+ <p className="text-xs text-slate-500 mt-1">Elements</p>
131
+ </div>
132
+ <div className="bg-slate-900 border border-slate-700 rounded-lg p-3 text-center">
133
+ <p className="text-2xl font-bold text-green-400">{formatNumber(result.total_chars)}</p>
134
+ <p className="text-xs text-slate-500 mt-1">Characters</p>
135
+ </div>
136
+ <div className="bg-slate-900 border border-slate-700 rounded-lg p-3 text-center">
137
+ <p className="text-2xl font-bold text-purple-400">{formatNumber(result.total_words)}</p>
138
+ <p className="text-xs text-slate-500 mt-1">Words</p>
139
+ </div>
140
+ <div className="bg-slate-900 border border-slate-700 rounded-lg p-3 text-center">
141
+ <p className="text-2xl font-bold text-orange-400">{result.page_count || '-'}</p>
142
+ <p className="text-xs text-slate-500 mt-1">Pages</p>
143
+ </div>
144
+ </div>
145
+
146
+ {/* Element Types */}
147
+ {result.element_types && Object.keys(result.element_types).length > 0 && (
148
+ <div>
149
+ <h4 className="text-sm font-medium text-slate-300 mb-3">Element Types</h4>
150
+ <div className="bg-slate-900 border border-slate-700 rounded-lg divide-y divide-slate-700">
151
+ {Object.entries(result.element_types)
152
+ .sort((a, b) => b[1] - a[1])
153
+ .map(([type, count]) => (
154
+ <div key={type} className="flex items-center justify-between px-4 py-2.5">
155
+ <span className="text-slate-300 capitalize">{type.replace('_', ' ')}</span>
156
+ <span className="text-slate-400 font-mono">{count}</span>
157
+ </div>
158
+ ))}
159
+ </div>
160
+ </div>
161
+ )}
162
+
163
+ {/* Sample Elements */}
164
+ {result.sample_elements && result.sample_elements.length > 0 && (
165
+ <div>
166
+ <h4 className="text-sm font-medium text-slate-300 mb-3">Sample Elements (first 10)</h4>
167
+ <div className="space-y-2">
168
+ {result.sample_elements.map((el, idx) => (
169
+ <div key={idx} className="bg-slate-900 border border-slate-700 rounded-lg p-3">
170
+ <div className="flex items-center gap-2 mb-2">
171
+ <span className={`text-xs font-medium px-2 py-0.5 rounded ${
172
+ el.type === 'heading' ? 'bg-blue-900/50 text-blue-300' :
173
+ el.type === 'table' ? 'bg-purple-900/50 text-purple-300' :
174
+ el.type === 'list_item' ? 'bg-orange-900/50 text-orange-300' :
175
+ 'bg-slate-700 text-slate-300'
176
+ }`}>
177
+ {el.type}
178
+ </span>
179
+ {el.level && (
180
+ <span className="text-xs text-slate-500">Level {el.level}</span>
181
+ )}
182
+ </div>
183
+ <p className="text-sm text-slate-400 break-words">{el.text || '(empty)'}</p>
184
+ </div>
185
+ ))}
186
+ </div>
187
+ </div>
188
+ )}
189
+ </div>
190
+ )}
191
+ </div>
192
+
193
+ {/* Footer */}
194
+ {result && (
195
+ <div className="p-4 border-t border-slate-700 flex justify-end gap-2">
196
+ <button
197
+ type="button"
198
+ onClick={runEval}
199
+ className="px-4 py-2 text-sm font-medium text-slate-300 hover:bg-slate-700 rounded-lg transition-colors"
200
+ >
201
+ Re-run
202
+ </button>
203
+ <button
204
+ type="button"
205
+ onClick={onClose}
206
+ className="px-4 py-2 text-sm font-medium bg-blue-600 text-white rounded-lg hover:bg-blue-700 transition-colors"
207
+ >
208
+ Done
209
+ </button>
210
+ </div>
211
+ )}
212
+ </div>
213
+ </div>
214
+ );
215
+ }
frontend/src/components/Sidebar.jsx CHANGED
@@ -5,6 +5,7 @@ import { chunkFiles } from '../api/chunker';
5
  import ProcessingStatus from './ProcessingStatus';
6
  import IndexSummary from './IndexSummary';
7
  import CloudConnect from './CloudConnect';
 
8
 
9
  export default function Sidebar({ onStatusChange, onAccessTokenChange }) {
10
  const [loading, setLoading] = useState(false);
@@ -16,6 +17,9 @@ export default function Sidebar({ onStatusChange, onAccessTokenChange }) {
16
  const [stagedFiles, setStagedFiles] = useState([]);
17
  const [accessToken, setAccessToken] = useState(null);
18
 
 
 
 
19
  // Handle files staged from CloudConnect (not processed yet)
20
  const handleFilesStaged = (files) => {
21
  setStagedFiles(files);
@@ -181,16 +185,29 @@ export default function Sidebar({ onStatusChange, onAccessTokenChange }) {
181
  <p className="text-xs text-slate-500">{formatSize(file.size)}</p>
182
  </div>
183
  </div>
184
- <button
185
- type="button"
186
- onClick={() => removeFile(file.id)}
187
- className="p-1 text-slate-500 hover:text-red-400 opacity-0 group-hover:opacity-100 transition-all"
188
- aria-label={`Remove ${file.name}`}
189
- >
190
- <svg className="w-4 h-4" fill="none" stroke="currentColor" viewBox="0 0 24 24">
191
- <path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M6 18L18 6M6 6l12 12" />
192
- </svg>
193
- </button>
 
 
 
 
 
 
 
 
 
 
 
 
 
194
  </div>
195
  ))}
196
  </div>
@@ -233,6 +250,14 @@ export default function Sidebar({ onStatusChange, onAccessTokenChange }) {
233
  </div>
234
  )}
235
 
 
 
 
 
 
 
 
 
236
  </div>
237
  );
238
  }
 
5
  import ProcessingStatus from './ProcessingStatus';
6
  import IndexSummary from './IndexSummary';
7
  import CloudConnect from './CloudConnect';
8
+ import ParsingEval from './ParsingEval';
9
 
10
  export default function Sidebar({ onStatusChange, onAccessTokenChange }) {
11
  const [loading, setLoading] = useState(false);
 
17
  const [stagedFiles, setStagedFiles] = useState([]);
18
  const [accessToken, setAccessToken] = useState(null);
19
 
20
+ // State for parsing evaluation
21
+ const [evalFile, setEvalFile] = useState(null);
22
+
23
  // Handle files staged from CloudConnect (not processed yet)
24
  const handleFilesStaged = (files) => {
25
  setStagedFiles(files);
 
185
  <p className="text-xs text-slate-500">{formatSize(file.size)}</p>
186
  </div>
187
  </div>
188
+ <div className="flex items-center gap-1 opacity-0 group-hover:opacity-100 transition-all">
189
+ <button
190
+ type="button"
191
+ onClick={() => setEvalFile(file)}
192
+ className="p-1 text-slate-500 hover:text-blue-400"
193
+ aria-label={`Test parsing ${file.name}`}
194
+ title="Test Docling Parsing"
195
+ >
196
+ <svg className="w-4 h-4" fill="none" stroke="currentColor" viewBox="0 0 24 24">
197
+ <path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M9 5H7a2 2 0 00-2 2v12a2 2 0 002 2h10a2 2 0 002-2V7a2 2 0 00-2-2h-2M9 5a2 2 0 002 2h2a2 2 0 002-2M9 5a2 2 0 012-2h2a2 2 0 012 2m-6 9l2 2 4-4" />
198
+ </svg>
199
+ </button>
200
+ <button
201
+ type="button"
202
+ onClick={() => removeFile(file.id)}
203
+ className="p-1 text-slate-500 hover:text-red-400"
204
+ aria-label={`Remove ${file.name}`}
205
+ >
206
+ <svg className="w-4 h-4" fill="none" stroke="currentColor" viewBox="0 0 24 24">
207
+ <path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M6 18L18 6M6 6l12 12" />
208
+ </svg>
209
+ </button>
210
+ </div>
211
  </div>
212
  ))}
213
  </div>
 
250
  </div>
251
  )}
252
 
253
+ {/* Parsing Evaluation Modal */}
254
+ {evalFile && (
255
+ <ParsingEval
256
+ file={evalFile}
257
+ accessToken={accessToken}
258
+ onClose={() => setEvalFile(null)}
259
+ />
260
+ )}
261
  </div>
262
  );
263
  }