doeqoth commited on
Commit
d081edf
·
verified ·
1 Parent(s): 17195f2

Manual changes saved

Browse files
Files changed (1) hide show
  1. index.html +934 -133
index.html CHANGED
@@ -1,149 +1,950 @@
1
- class AppHeader extends HTMLElement {
2
- constructor() {
3
- super();
4
- }
5
-
6
- connectedCallback() {
7
- this.attachShadow({ mode: 'open' });
8
- this.shadowRoot.innerHTML = `
9
- <style>
10
- :host {
11
- display: block;
12
- position: fixed;
13
- top: 0;
14
- left: 0;
15
- right: 0;
16
- z-index: 1000;
17
- }
18
-
19
- header {
20
- height: 60px;
21
- background: rgba(255, 255, 255, 0.9);
22
- backdrop-filter: blur(12px);
23
- -webkit-backdrop-filter: blur(12px);
24
- border-bottom: 1px solid rgba(226, 232, 240, 0.6);
25
- box-shadow: 0 1px 3px 0 rgb(0 0 0 / 0.1);
26
- }
27
-
28
- .container {
29
- max-width: 1200px;
30
- margin: 0 auto;
31
- padding: 0 20px;
32
- height: 100%;
33
- display: flex;
34
- align-items: center;
35
- justify-content: space-between;
36
- }
37
-
38
- .brand {
39
- display: flex;
40
- align-items: center;
41
- gap: 12px;
42
- cursor: pointer;
43
- text-decoration: none;
44
- }
45
-
46
- .logo-icon {
47
- width: 36px;
48
- height: 36px;
49
- background: linear-gradient(135deg, #2563eb, #3b82f6);
50
- border-radius: 10px;
51
- display: flex;
52
- align-items: center;
53
- justify-content: center;
54
- color: white;
55
- box-shadow: 0 2px 8px rgba(37, 99, 235, 0.3);
56
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
57
 
58
- .brand-text {
59
- font-size: 1.25rem;
60
- font-weight: 700;
61
- color: #1e40af;
62
- letter-spacing: -0.02em;
63
- font-family: 'Sarabun', sans-serif;
 
64
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
65
 
66
- .badge {
67
- font-size: 0.65rem;
68
- background: #dbeafe;
69
- color: #1e40af;
70
- padding: 2px 8px;
71
- border-radius: 12px;
72
- font-weight: 600;
73
- margin-left: 8px;
74
- border: 1px solid #bfdbfe;
75
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
76
 
77
- .reset-btn {
78
- display: flex;
79
- align-items: center;
80
- gap: 8px;
81
- padding: 8px 16px;
82
- background: white;
83
- border: 1px solid #e2e8f0;
84
- border-radius: 8px;
85
- color: #64748b;
86
- font-size: 0.875rem;
87
- font-weight: 500;
88
- cursor: pointer;
89
- transition: all 0.2s;
90
- font-family: 'Sarabun', sans-serif;
91
  }
92
-
93
- .reset-btn:hover {
94
- background: #f8fafc;
95
- border-color: #cbd5e1;
96
- color: #dc2626;
97
- transform: translateY(-1px);
98
- box-shadow: 0 2px 4px rgba(0,0,0,0.05);
99
  }
100
-
101
- .reset-btn:active {
102
- transform: translateY(0);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
103
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
104
 
105
- @media (max-width: 640px) {
106
- .brand-text {
107
- font-size: 1.1rem;
108
- }
109
- .badge {
110
- display: none;
 
 
 
 
 
 
111
  }
112
- .reset-btn span {
113
- display: none;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
114
  }
115
- .reset-btn {
116
- padding: 8px;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
117
  }
118
- }
119
- </style>
120
-
121
- <header>
122
- <div class="container">
123
- <a href="#" class="brand" onclick="event.preventDefault(); window.scrollTo({top: 0, behavior: 'smooth'});">
124
- <div class="logo-icon">
125
- <svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"><path d="M14 2H6a2 2 0 0 0-2 2v16a2 2 0 0 0 2 2h12a2 2 0 0 0 2-2V8z"></path><polyline points="14 2 14 8 20 8"></polyline><line x1="16" y1="13" x2="8" y2="13"></line><line x1="16" y1="17" x2="8" y2="17"></line><polyline points="10 9 9 9 8 9"></polyline></svg>
126
- </div>
127
- <div style="display: flex; align-items: baseline;">
128
- <span class="brand-text">DocuAnalyza</span>
129
- <span class="badge">TH AI</span>
130
- </div>
131
- </a>
132
-
133
- <button class="reset-btn" id="reset-btn" title="เริ่มใหม่">
134
- <svg xmlns="http://www.w3.org/2000/svg" width="18" height="18" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"><path d="M3 12a9 9 0 1 0 9-9 9.75 9.75 0 0 0-6.74 2.74L3 8"></path><path d="M3 3v5h5"></path></svg>
135
- <span>เริ่มใหม่</span>
136
- </button>
137
- </div>
138
- </header>
139
- `;
140
-
141
- this.shadowRoot.getElementById('reset-btn').addEventListener('click', () => {
142
- if (confirm('ต้องการเริ่мใหม่? ข้อมูลที่ไม่ได้บันทึกจะหายไป')) {
143
- document.dispatchEvent(new CustomEvent('reset-app'));
144
  }
 
145
  });
 
 
 
146
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
147
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
148
 
149
- customElements.define('app-header', AppHeader);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ // Global State Management
3
+ const state = {
4
+ currentFile: null,
5
+ fileType: null,
6
+ pdfDoc: null,
7
+ currentPage: 1,
8
+ totalPages: 0,
9
+ extractedData: [],
10
+ extractedDataWithCoords: [],
11
+ isProcessing: false,
12
+ ocrWorker: null,
13
+ abortController: null
14
+ };
15
+ // Initialize PDF.js Worker
16
+ pdfjsLib.GlobalWorkerOptions.workerSrc = 'https://cdnjs.cloudflare.com/ajax/libs/pdf.js/3.11.174/pdf.worker.min.js';
17
+
18
+ // DOM Elements
19
+ let elements = {};
20
+
21
+ function initializeElements() {
22
+ elements = {
23
+ uploadSection: document.getElementById('upload-section'),
24
+ previewSection: document.getElementById('preview-section'),
25
+ resultsSection: document.getElementById('results-section'),
26
+ previewContainer: document.getElementById('preview-container'),
27
+ pageSelector: document.getElementById('page-selector'),
28
+ fileInfo: document.getElementById('file-info'),
29
+ resultsGrid: document.getElementById('results-grid'),
30
+ resultsStats: document.getElementById('results-stats'),
31
+ processingModal: document.getElementById('processing-modal'),
32
+ processingTitle: document.getElementById('processing-title'),
33
+ processingStatus: document.getElementById('processing-status'),
34
+ progressBar: document.getElementById('progress-bar'),
35
+ progressPercent: document.getElementById('progress-percent'),
36
+ jsonContent: document.getElementById('json-content'),
37
+ jsonPreview: document.getElementById('json-preview'),
38
+ jsonChevron: document.getElementById('json-chevron'),
39
+ toggleJson: document.getElementById('toggle-json'),
40
+ copyAllBtn: document.getElementById('copy-all-btn'),
41
+ toast: document.getElementById('toast'),
42
+ toastMessage: document.getElementById('toast-message')
43
+ };
44
+ }
45
+ // Event Listeners Setup
46
+ document.addEventListener('DOMContentLoaded', () => {
47
+ initializeElements();
48
+ setupEventListeners();
49
+ setupGlobalErrorHandling();
50
+
51
+ // Listen for invalid-file event from upload-zone
52
+ document.addEventListener('invalid-file', (e) => {
53
+ showToast(e.detail.message, 'error');
54
+ });
55
+ });
56
+ function setupEventListeners() {
57
+ // Listen for custom events from components
58
+ document.addEventListener('file-selected', handleFileSelected);
59
+ document.addEventListener('reset-app', resetApplication);
60
+ document.addEventListener('analyze-pdf', analyzePDFStructure);
61
+ document.addEventListener('ocr-thai', performOCR);
62
+ document.addEventListener('export-json', () => exportData('json'));
63
+ document.addEventListener('export-csv', () => exportData('csv'));
64
+ document.addEventListener('export-excel', () => exportData('excel'));
65
+ document.addEventListener('export-html', () => exportData('html'));
66
+ document.addEventListener('copy-all', copyAllData);
67
+
68
+ // Page selector change
69
+ if (elements.pageSelector) {
70
+ elements.pageSelector.addEventListener('change', (e) => {
71
+ state.currentPage = parseInt(e.target.value);
72
+ renderPDFPage(state.currentPage);
73
+ });
74
+ }
75
+
76
+ // JSON Toggle
77
+ if (elements.toggleJson) {
78
+ elements.toggleJson.addEventListener('click', toggleJsonPreview);
79
+ }
80
+
81
+ // Copy All Button
82
+ if (elements.copyAllBtn) {
83
+ elements.copyAllBtn.addEventListener('click', copyAllData);
84
+ }
85
+ }
86
+ function setupGlobalErrorHandling() {
87
+ window.addEventListener('error', (e) => {
88
+ console.error('Global error:', e.error);
89
+ showToast('เกิดข้อผิดพลาด: ' + (e.error?.message || 'Unknown error'), 'error');
90
+ });
91
+
92
+ window.addEventListener('unhandledrejection', (e) => {
93
+ console.error('Unhandled promise rejection:', e.reason);
94
+ showToast('เกิดข้อผิดพลาดในการประมวลผล', 'error');
95
+ });
96
+ }
97
+ // File Handling Functions
98
+ function handleFileSelected(e) {
99
+ const file = e.detail.file;
100
+ if (!file) return;
101
+
102
+ // Validate file type
103
+ const validTypes = ['application/pdf', 'image/jpeg', 'image/jpg', 'image/png'];
104
+ if (!validTypes.includes(file.type)) {
105
+ showToast('ไฟล์ไม่รองรับ กรุณาอัปโหลด PDF, JPG หรือ PNG', 'error');
106
+ return;
107
+ }
108
+
109
+ // Check file size (limit to 50MB)
110
+ const maxSize = 50 * 1024 * 1024; // 50MB
111
+ if (file.size > maxSize) {
112
+ showToast('ไฟล์ใหญ่เกินไป กรุณาอัปโหลดไฟล์ขนาดไม่เกิน 50MB', 'error');
113
+ return;
114
+ }
115
+
116
+ // Clean up previous state
117
+ if (state.ocrWorker) {
118
+ state.ocrWorker.terminate();
119
+ state.ocrWorker = null;
120
+ }
121
+
122
+ if (state.abortController) {
123
+ state.abortController.abort();
124
+ }
125
+
126
+ state.abortController = new AbortController();
127
+ state.currentFile = file;
128
+ state.extractedData = [];
129
+
130
+ if (file.type === 'application/pdf') {
131
+ state.fileType = 'pdf';
132
+ loadPDF(file);
133
+ } else {
134
+ state.fileType = 'image';
135
+ loadImage(file);
136
+ }
137
+
138
+ updateUIForFileLoaded();
139
+ }
140
+ function updateUIForFileLoaded() {
141
+ elements.uploadSection.classList.add('hidden');
142
+ elements.previewSection.classList.remove('hidden');
143
+ elements.resultsSection.classList.add('hidden');
144
+
145
+ const sizeMB = (state.currentFile.size / 1024 / 1024).toFixed(2);
146
+ elements.fileInfo.textContent = `${state.currentFile.name} (${sizeMB} MB)`;
147
+ }
148
+ async function loadPDF(file) {
149
+ try {
150
+ const arrayBuffer = await file.arrayBuffer();
151
+ const pdf = await pdfjsLib.getDocument({
152
+ data: arrayBuffer,
153
+ disableAutoFetch: true,
154
+ disableStream: true
155
+ }).promise;
156
+
157
+ state.pdfDoc = pdf;
158
+ state.totalPages = pdf.numPages;
159
+ state.currentPage = 1;
160
+
161
+ // Setup page selector
162
+ setupPageSelector();
163
+
164
+ // Render first page
165
+ await renderPDFPage(1);
166
+
167
+ showToast('โหลด PDF สำเร็จ');
168
+ } catch (error) {
169
+ console.error('PDF Load Error:', error);
170
+ if (error.name !== 'AbortError') {
171
+ showToast('ไม่สามารถโหลด PDF ได้', 'error');
172
+ }
173
+ }
174
+ }
175
+ function setupPageSelector() {
176
+ if (elements.pageSelector) {
177
+ elements.pageSelector.innerHTML = '';
178
+ elements.pageSelector.classList.remove('hidden');
179
+
180
+ for (let i = 1; i <= state.totalPages; i++) {
181
+ const option = document.createElement('option');
182
+ option.value = i;
183
+ option.textContent = `หน้า ${i} จาก ${state.totalPages}`;
184
+ elements.pageSelector.appendChild(option);
185
+ }
186
+
187
+ elements.pageSelector.value = 1;
188
+ }
189
+ }
190
+ async function renderPDFPage(pageNum) {
191
+ try {
192
+ if (elements.previewContainer) {
193
+ elements.previewContainer.innerHTML = '<div class="flex items-center justify-center h-64"><i data-feather="loader" class="animate-spin w-8 h-8 text-blue-600"></i></div>';
194
+ feather.replace();
195
+ }
196
+
197
+ const page = await state.pdfDoc.getPage(pageNum);
198
+ const scale = 1.5;
199
+ const viewport = page.getViewport({ scale });
200
+
201
+ const canvas = document.createElement('canvas');
202
+ canvas.id = 'pdf-canvas';
203
+ const context = canvas.getContext('2d');
204
+ canvas.height = viewport.height;
205
+ canvas.width = viewport.width;
206
+
207
+ await page.render({
208
+ canvasContext: context,
209
+ viewport: viewport
210
+ }).promise;
211
+
212
+ if (elements.previewContainer) {
213
+ elements.previewContainer.innerHTML = '';
214
+ elements.previewContainer.appendChild(canvas);
215
+ }
216
+
217
+ page.cleanup();
218
+ } catch (error) {
219
+ console.error('Render Error:', error);
220
+ if (error.name !== 'AbortError') {
221
+ showToast('ไม่สามารถแสดงหน้าได้', 'error');
222
+ }
223
+ }
224
+ }
225
+ function loadImage(file) {
226
+ if (elements.pageSelector) {
227
+ elements.pageSelector.classList.add('hidden');
228
+ }
229
+
230
+ const reader = new FileReader();
231
+
232
+ reader.onload = (e) => {
233
+ const img = document.createElement('img');
234
+ img.id = 'preview-image';
235
+ img.src = e.target.result;
236
+ img.className = 'max-w-full h-auto rounded-lg shadow-lg';
237
+ img.loading = 'lazy';
238
+
239
+ if (elements.previewContainer) {
240
+ elements.previewContainer.innerHTML = '';
241
+ elements.previewContainer.appendChild(img);
242
+ }
243
+ };
244
+
245
+ reader.onerror = () => {
246
+ showToast('ไม่สามารถโหลดภาพได้', 'error');
247
+ };
248
+
249
+ reader.readAsDataURL(file);
250
+ }
251
+ // PDF Structure Analysis
252
+ async function analyzePDFStructure() {
253
+ if (!state.pdfDoc || state.isProcessing) return;
254
+
255
+ state.isProcessing = true;
256
+ showProcessingModal('กำลังวิเคราะห์โครงสร้าง PDF', 'กำลังอ่านข้อมูลจากเอกสาร...', 0);
257
+
258
+ try {
259
+ const allData = [];
260
+ const allDataWithCoords = [];
261
+ const Y_TOLERANCE = 10; // pixels
262
+ const X_GAP_THRESHOLD = 50; // pixels for column detection
263
+
264
+ for (let pageNum = 1; pageNum <= state.totalPages; pageNum++) {
265
+ // Check for abort signal
266
+ if (state.abortController?.signal.aborted) {
267
+ break;
268
+ }
269
+
270
+ updateProcessingProgress(`กำลังวิเคราะห์หน้า ${pageNum}/${state.totalPages}...`, ((pageNum - 1) / state.totalPages) * 100);
271
+
272
+ const page = await state.pdfDoc.getPage(pageNum);
273
+ const viewport = page.getViewport({ scale: 1.0 });
274
+ const textContent = await page.getTextContent();
275
+ const items = textContent.items;
276
+
277
+ if (items.length === 0) {
278
+ page.cleanup();
279
+ continue;
280
+ }
281
+
282
+ // Sort by Y position (descending - top to bottom)
283
+ items.sort((a, b) => b.transform[5] - a.transform[5]);
284
+
285
+ // Group by rows using Y-tolerance
286
+ const rows = [];
287
+ items.forEach(item => {
288
+ const y = item.transform[5];
289
+ const existingRow = rows.find(r => Math.abs(r.y - y) < Y_TOLERANCE);
290
 
291
+ if (existingRow) {
292
+ existingRow.items.push(item);
293
+ // Update average Y
294
+ const totalY = existingRow.items.reduce((sum, i) => sum + i.transform[5], 0);
295
+ existingRow.y = totalY / existingRow.items.length;
296
+ } else {
297
+ rows.push({ y, items: [item], originalY: y });
298
  }
299
+ });
300
+
301
+ // Sort rows by Y (top to bottom)
302
+ rows.sort((a, b) => b.y - a.y);
303
+
304
+ // Sort items in each row by X (left to right)
305
+ rows.forEach(row => {
306
+ row.items.sort((a, b) => a.transform[4] - b.transform[4]);
307
+ });
308
+
309
+ // Detect column boundaries
310
+ const columnBoundaries = detectColumnBoundaries(rows, X_GAP_THRESHOLD);
311
+
312
+ // Convert to 2D array
313
+ const pageData = rows.map(row => {
314
+ const rowData = new Array(columnBoundaries.length + 1).fill('');
315
+ const rowDataWithCoords = new Array(columnBoundaries.length + 1).fill(null);
316
 
317
+ row.items.forEach(item => {
318
+ const x = item.transform[4];
319
+ const colIndex = findColumnIndex(x, columnBoundaries);
320
+ if (colIndex !== -1) {
321
+ const text = item.str.trim();
322
+ if (text) {
323
+ rowData[colIndex] += (rowData[colIndex] ? ' ' : '') + text;
324
+
325
+ // Store with coordinates
326
+ if (!rowDataWithCoords[colIndex]) {
327
+ rowDataWithCoords[colIndex] = {
328
+ text: text,
329
+ coordinates: {
330
+ x: Math.round(item.transform[4]),
331
+ y: Math.round(viewport.height - item.transform[5]),
332
+ width: Math.round(item.width),
333
+ height: Math.round(item.height)
334
+ }
335
+ };
336
+ } else {
337
+ // Append text and update coordinates
338
+ rowDataWithCoords[colIndex].text += ' ' + text;
339
+ // Update coordinates to encompass the entire text
340
+ rowDataWithCoords[colIndex].coordinates.width = Math.round(
341
+ item.transform[4] + item.width - rowDataWithCoords[colIndex].coordinates.x
342
+ );
343
+ }
344
+ }
345
+ }
346
+ });
347
 
348
+ return {
349
+ textData: rowData.filter(cell => cell.trim() !== ''),
350
+ coordData: rowDataWithCoords.filter(cell => cell && cell.text.trim() !== '')
351
+ };
352
+ });
353
+
354
+ // Add to main arrays
355
+ pageData.forEach(row => {
356
+ if (row.textData.length > 0) {
357
+ allData.push(row.textData);
 
 
 
 
358
  }
359
+ if (row.coordData.length > 0) {
360
+ allDataWithCoords.push({
361
+ page: pageNum,
362
+ cells: row.coordData
363
+ });
 
 
364
  }
365
+ });
366
+
367
+ page.cleanup();
368
+
369
+ // Yield to UI thread every page
370
+ await new Promise(resolve => setTimeout(resolve, 0));
371
+ }
372
+
373
+ if (!state.abortController?.signal.aborted) {
374
+ state.extractedData = allData;
375
+ state.extractedDataWithCoords = allDataWithCoords;
376
+ renderResults();
377
+ showToast(`วิเคราะห์สำเร็จ พบ ${allData.length} แถวข้อมูล`);
378
+ }
379
+ } catch (error) {
380
+ console.error('Analysis Error:', error);
381
+ if (error.name !== 'AbortError') {
382
+ showToast('เกิดข้อผิดพลาดในการวิเคราะห์: ' + error.message, 'error');
383
+ }
384
+ } finally {
385
+ state.isProcessing = false;
386
+ hideProcessingModal();
387
+ }
388
+ }
389
+ function detectColumnBoundaries(rows, threshold) {
390
+ const gaps = [];
391
+
392
+ rows.forEach(row => {
393
+ for (let i = 1; i < row.items.length; i++) {
394
+ const prevX = row.items[i-1].transform[4] + (row.items[i-1].width || 0);
395
+ const currentX = row.items[i].transform[4];
396
+ const gap = currentX - prevX;
397
+
398
+ if (gap > threshold) {
399
+ gaps.push((prevX + currentX) / 2);
400
+ }
401
+ }
402
+ });
403
+
404
+ // Cluster similar boundaries (within 20px)
405
+ const clustered = [];
406
+ gaps.forEach(gap => {
407
+ const existing = clustered.find(c => Math.abs(c - gap) < 20);
408
+ if (!existing) clustered.push(gap);
409
+ });
410
+
411
+ return clustered.sort((a, b) => a - b);
412
+ }
413
+
414
+ function findColumnIndex(x, boundaries) {
415
+ for (let i = 0; i < boundaries.length; i++) {
416
+ if (x < boundaries[i]) return i;
417
+ }
418
+ return boundaries.length;
419
+ }
420
+ // OCR Functions
421
+ async function performOCR() {
422
+ if (state.isProcessing) return;
423
+
424
+ let imageSource = null;
425
+
426
+ if (state.fileType === 'image') {
427
+ const img = document.getElementById('preview-image');
428
+ if (img) imageSource = img.src;
429
+ } else if (state.fileType === 'pdf') {
430
+ const canvas = document.getElementById('pdf-canvas');
431
+ if (canvas) imageSource = canvas.toDataURL('image/png');
432
+ }
433
+
434
+ if (!imageSource) {
435
+ showToast('ไม่พบเอกสารสำหรับ OCR', 'error');
436
+ return;
437
+ }
438
+
439
+ state.isProcessing = true;
440
+ showProcessingModal('กำลังตรวจจับ��้อความ (OCR)', 'กำลังโหลด Engine...', 0);
441
+
442
+ try {
443
+ const worker = await Tesseract.createWorker('tha', 1, {
444
+ logger: m => {
445
+ if (m.status === 'recognizing text') {
446
+ updateProcessingProgress(`กำลังอ่านข้อความภาษาไทย... ${Math.round(m.progress * 100)}%`, m.progress * 100);
447
+ } else if (m.status === 'loading language traineddata') {
448
+ updateProcessingProgress('กำลังโหลดข้อมูลภาษาไทย...', 10);
449
  }
450
+ },
451
+ errorHandler: err => console.error('OCR Error:', err)
452
+ });
453
+
454
+ state.ocrWorker = worker;
455
+
456
+ const result = await worker.recognize(imageSource);
457
+
458
+ // Parse OCR result into structured data
459
+ const lines = result.data.text.split('\n').filter(line => line.trim());
460
+ const parsed = lines.map(line => {
461
+ // Split by multiple spaces to detect columns
462
+ return line.split(/\s{2,}/).map(cell => cell.trim()).filter(cell => cell);
463
+ }).filter(row => row.length > 0);
464
+
465
+ state.extractedData = parsed;
466
+ renderResults();
467
+ showToast(`OCR สำเร็จ พบข้อความ ${lines.length} บรรทัด`);
468
+ } catch (error) {
469
+ console.error('OCR Error:', error);
470
+ if (error.name !== 'AbortError') {
471
+ showToast('เกิดข้อผิดพลาดในการ OCR: ' + error.message, 'error');
472
+ }
473
+ } finally {
474
+ if (state.ocrWorker) {
475
+ await state.ocrWorker.terminate();
476
+ state.ocrWorker = null;
477
+ }
478
+ state.isProcessing = false;
479
+ hideProcessingModal();
480
+ }
481
+ }
482
+ // Results Rendering
483
+ function renderResults() {
484
+ if ((!state.extractedData.length && !state.extractedDataWithCoords.length) || !elements.resultsSection) return;
485
+
486
+ elements.resultsSection.classList.remove('hidden');
487
+ elements.resultsGrid.innerHTML = '';
488
+
489
+ const totalRows = state.extractedData.length || state.extractedDataWithCoords.length;
490
+ if (elements.resultsStats) {
491
+ elements.resultsStats.textContent = `พบข้อมูล ${totalRows} แถว`;
492
+ }
493
+
494
+ // Update JSON preview with coordinate data if available
495
+ const dataToShow = state.extractedDataWithCoords.length ? state.extractedDataWithCoords : state.extractedData;
496
+ if (elements.jsonContent) {
497
+ elements.jsonContent.textContent = JSON.stringify(dataToShow, null, 2);
498
+ }
499
+
500
+ const fragment = document.createDocumentFragment();
501
+ const batchSize = 50;
502
+ let renderedCount = 0;
503
+
504
+ function renderBatch(startIndex) {
505
+ const endIndex = Math.min(startIndex + batchSize, totalRows);
506
+
507
+ for (let i = startIndex; i < endIndex; i++) {
508
+ const row = dataToShow[i];
509
+ const card = document.createElement('div');
510
+ card.className = 'data-card bg-white rounded-xl p-4 shadow-sm hover:shadow-md transition-all';
511
+
512
+ const header = document.createElement('div');
513
+ header.className = 'flex justify-between items-center mb-3 pb-2 border-b border-slate-100';
514
+
515
+ const rowNum = document.createElement('span');
516
+ rowNum.className = 'text-xs font-bold text-blue-600 bg-blue-50 px-2 py-1 rounded';
517
+ rowNum.textContent = `แถวที่ ${i + 1}`;
518
+
519
+ const copyBtn = document.createElement('button');
520
+ copyBtn.className = 'text-slate-400 hover:text-blue-600 transition-colors';
521
+ copyBtn.innerHTML = '<i data-feather="copy" class="w-4 h-4"></i>';
522
+ copyBtn.onclick = () => copyRow(row, i);
523
+
524
+ header.appendChild(rowNum);
525
+ header.appendChild(copyBtn);
526
+
527
+ const content = document.createElement('div');
528
+ content.className = 'space-y-2';
529
+
530
+ // Check if we have coordinate data
531
+ const hasCoords = row.coordinates || (Array.isArray(row) && row.some(cell => cell && typeof cell === 'object' && cell.text));
532
+
533
+ if (hasCoords) {
534
+ // Handle coordinate-based data
535
+ const cells = Array.isArray(row) ? row : row.cells || [];
536
 
537
+ cells.forEach((cell, colIndex) => {
538
+ if (!cell) return;
539
+
540
+ const cellText = typeof cell === 'object' ? cell.text : cell;
541
+ const cellCoords = typeof cell === 'object' ? cell.coordinates : null;
542
+
543
+ const cellDiv = document.createElement('div');
544
+ cellDiv.className = 'text-sm text-slate-700 flex gap-2 items-start';
545
+
546
+ let coordInfo = '';
547
+ if (cellCoords) {
548
+ coordInfo = `<span class="text-xs text-slate-400 font-mono">[${cellCoords.x},${cellCoords.y}]</span>`;
549
  }
550
+
551
+ cellDiv.innerHTML = `
552
+ <span class="text-xs text-slate-400 font-mono w-6 shrink-0">C${colIndex + 1}</span>
553
+ <div class="flex-1">
554
+ <div class="flex justify-between items-start">
555
+ <span>${escapeHtml(cellText || '')}</span>
556
+ ${coordInfo}
557
+ </div>
558
+ </div>
559
+ `;
560
+ content.appendChild(cellDiv);
561
+ });
562
+ } else {
563
+ // Handle regular array data
564
+ row.forEach((cell, colIndex) => {
565
+ if (!cell) return;
566
+ const cellDiv = document.createElement('div');
567
+ cellDiv.className = 'text-sm text-slate-700 flex gap-2';
568
+ cellDiv.innerHTML = `
569
+ <span class="text-xs text-slate-400 font-mono w-6">C${colIndex + 1}</span>
570
+ <span class="flex-1">${escapeHtml(cell)}</span>
571
+ `;
572
+ content.appendChild(cellDiv);
573
+ });
574
+ }
575
+
576
+ card.appendChild(header);
577
+ card.appendChild(content);
578
+ fragment.appendChild(card);
579
+ }
580
+
581
+ renderedCount = endIndex;
582
+
583
+ if (renderedCount < totalRows) {
584
+ setTimeout(() => renderBatch(renderedCount), 0);
585
+ } else {
586
+ elements.resultsGrid.appendChild(fragment);
587
+ feather.replace();
588
+
589
+ // Scroll to results
590
+ setTimeout(() => {
591
+ elements.resultsSection.scrollIntoView({ behavior: 'smooth', block: 'start' });
592
+ }, 100);
593
+ }
594
+ }
595
+
596
+ renderBatch(0);
597
+ }
598
+ // Export Functions
599
+ function exportData(format) {
600
+ const dataToExport = state.extractedDataWithCoords.length ? state.extractedDataWithCoords : state.extractedData;
601
+
602
+ if (!dataToExport.length) {
603
+ showToast('ไม่มีข้อมูลสำหรับ Export', 'error');
604
+ return;
605
+ }
606
+
607
+ const timestamp = new Date().toISOString().slice(0, 19).replace(/:/g, '-');
608
+ const filename = `DocuAnalyza-${timestamp}`;
609
+
610
+ switch(format) {
611
+ case 'json':
612
+ exportJSON(filename, dataToExport);
613
+ break;
614
+ case 'csv':
615
+ exportCSV(filename, dataToExport);
616
+ break;
617
+ case 'excel':
618
+ exportExcel(filename, dataToExport);
619
+ break;
620
+ case 'html':
621
+ exportHTML(filename, dataToExport);
622
+ break;
623
+ }
624
+ }
625
+ function exportJSON(filename, data) {
626
+ const dataStr = JSON.stringify(data, null, 2);
627
+ downloadFile(dataStr, `${filename}.json`, 'application/json');
628
+ showToast('ดาวน์โหลด JSON สำเร็จ');
629
+ }
630
+ function exportCSV(filename, data) {
631
+ let csv = '';
632
+
633
+ if (state.extractedDataWithCoords.length) {
634
+ // Export coordinate data as CSV
635
+ data.forEach((row, rowIndex) => {
636
+ if (row.cells && Array.isArray(row.cells)) {
637
+ const rowData = row.cells.map(cell => {
638
+ if (cell && typeof cell === 'object') {
639
+ return `"${(cell.text || '').toString().replace(/"/g, '""')}"`;
640
  }
641
+ return '""';
642
+ });
643
+ csv += rowData.join(',') + '\n';
644
+ }
645
+ });
646
+ } else {
647
+ // Export regular array data
648
+ csv = data.map(row =>
649
+ row.map(cell => `"${(cell || '').toString().replace(/"/g, '""')}"`).join(',')
650
+ ).join('\n');
651
+ }
652
+
653
+ downloadFile(csv, `${filename}.csv`, 'text/csv');
654
+ showToast('ดาวน์โหลด CSV สำเร็จ');
655
+ }
656
+ function exportExcel(filename, data) {
657
+ let worksheetData;
658
+
659
+ if (state.extractedDataWithCoords.length) {
660
+ // Export coordinate data
661
+ worksheetData = data.map(row => {
662
+ if (row.cells && Array.isArray(row.cells)) {
663
+ return row.cells.map(cell => {
664
+ if (cell && typeof cell === 'object') {
665
+ return cell.text || '';
666
  }
667
+ return '';
668
+ });
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
669
  }
670
+ return [];
671
  });
672
+ } else {
673
+ // Export regular array data
674
+ worksheetData = data;
675
  }
676
+
677
+ const ws = XLSX.utils.aoa_to_sheet(worksheetData);
678
+ const wb = XLSX.utils.book_new();
679
+ XLSX.utils.book_append_sheet(wb, ws, "Extracted Data");
680
+
681
+ // Auto-width columns
682
+ if (worksheetData.length > 0) {
683
+ const colWidths = worksheetData[0].map((_, colIndex) => ({
684
+ wch: Math.max(...worksheetData.map(row => (row[colIndex] || '').toString().length)) + 2
685
+ }));
686
+ ws['!cols'] = colWidths;
687
+ }
688
+
689
+ XLSX.writeFile(wb, `${filename}.xlsx`);
690
+ showToast('ดาวน์โหลด Excel สำเร็จ');
691
  }
692
+ function exportHTML(filename, data) {
693
+ if (!data.length) {
694
+ showToast('ไม่มีข้อมูลสำหรับ Export', 'error');
695
+ return;
696
+ }
697
+
698
+ try {
699
+ let htmlContent = '';
700
+
701
+ if (state.extractedDataWithCoords.length) {
702
+ // Export coordinate data
703
+ htmlContent = `
704
+ <!DOCTYPE html>
705
+ <html lang="th">
706
+ <head>
707
+ <meta charset="UTF-8">
708
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
709
+ <title>${escapeHtml(state.currentFile?.name || 'เอกสาร')}</title>
710
+ <style>
711
+ body {
712
+ font-family: 'Sarabun', sans-serif;
713
+ font-size: 11pt;
714
+ line-height: 1.6;
715
+ color: #000;
716
+ background: white;
717
+ margin: 0;
718
+ padding: 20px;
719
+ }
720
+
721
+ .document-container {
722
+ width: 100%;
723
+ background: white;
724
+ margin: 0 auto;
725
+ padding: 0;
726
+ box-sizing: border-box;
727
+ }
728
+
729
+ .content-section {
730
+ margin: 0;
731
+ }
732
+
733
+ .row {
734
+ margin-bottom: 12px;
735
+ padding: 8px;
736
+ border-bottom: 1px solid #eee;
737
+ }
738
+
739
+ .coord-info {
740
+ font-size: 9pt;
741
+ color: #666;
742
+ font-family: monospace;
743
+ }
744
+ </style>
745
+ </head>
746
+ <body>
747
+ <div class="document-container">
748
+ <div class="content-section">
749
+ ${data.map(row =>
750
+ `<div class="row">
751
+ ${row.cells ? row.cells.map(cell =>
752
+ `<div>
753
+ <span>${escapeHtml(cell.text || '')}</span>
754
+ <span class="coord-info">[${cell.coordinates.x},${cell.coordinates.y}]</span>
755
+ </div>`
756
+ ).join('') : ''}
757
+ </div>`
758
+ ).join('')}
759
+ </div>
760
+ </div>
761
+ </body>
762
+ </html>`;
763
+ } else {
764
+ // Export regular array data
765
+ htmlContent = `
766
+ <!DOCTYPE html>
767
+ <html lang="th">
768
+ <head>
769
+ <meta charset="UTF-8">
770
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
771
+ <title>${escapeHtml(state.currentFile?.name || 'เอกสาร')}</title>
772
+ <style>
773
+ body {
774
+ font-family: 'Sarabun', sans-serif;
775
+ font-size: 11pt;
776
+ line-height: 1.6;
777
+ color: #000;
778
+ background: white;
779
+ margin: 0;
780
+ padding: 20px;
781
+ }
782
+
783
+ .document-container {
784
+ width: 100%;
785
+ background: white;
786
+ margin: 0 auto;
787
+ padding: 0;
788
+ box-sizing: border-box;
789
+ }
790
 
791
+ .content-section {
792
+ margin: 0;
793
+ }
794
+ </style>
795
+ </head>
796
+ <body>
797
+ <div class="document-container">
798
+ <div class="content-section">
799
+ ${data.map(row =>
800
+ `<div style="margin-bottom: 8px;">${row.map(cell =>
801
+ escapeHtml(cell || '')
802
+ ).join(' ')}</div>`
803
+ ).join('')}
804
+ </div>
805
+ </div>
806
+ </body>
807
+ </html>`;
808
+ }
809
+
810
+ downloadFile(htmlContent, `${filename}.html`, 'text/html');
811
+ showToast('ดาวน์โหลด HTML สำเร็จ');
812
+ } catch (error) {
813
+ console.error('HTML Export Error:', error);
814
+ showToast('เกิดข้อผิดพลาดในการ Export', 'error');
815
+ }
816
+ }
817
+ function downloadFile(content, filename, mimeType) {
818
+ const blob = new Blob([content], { type: mimeType + ';charset=utf-8' });
819
+ const url = URL.createObjectURL(blob);
820
+ const a = document.createElement('a');
821
+ a.href = url;
822
+ a.download = filename;
823
+ document.body.appendChild(a);
824
+ a.click();
825
+ document.body.removeChild(a);
826
+ URL.revokeObjectURL(url);
827
+ }
828
+ // Utility Functions
829
+ function copyAllData() {
830
+ const dataToCopy = state.extractedDataWithCoords.length ? state.extractedDataWithCoords : state.extractedData;
831
+
832
+ if (!dataToCopy.length) return;
833
+
834
+ let text = '';
835
+
836
+ if (state.extractedDataWithCoords.length) {
837
+ text = dataToCopy.map(row =>
838
+ row.cells ? row.cells.map(cell => cell.text || '').join(' | ') : ''
839
+ ).join('\n');
840
+ } else {
841
+ text = dataToCopy.map(row => row.join(' | ')).join('\n');
842
+ }
843
+
844
+ navigator.clipboard.writeText(text).then(() => {
845
+ showToast('คัดลอกข้อมูลทั้งหมดแล้ว');
846
+ }).catch(() => {
847
+ showToast('ไม่สามารถคัดลอกได้', 'error');
848
+ });
849
+ }
850
+ function copyRow(row, index) {
851
+ const text = row.join(' | ');
852
+ navigator.clipboard.writeText(text).then(() => {
853
+ showToast(`คัดลอกแถวที่ ${index + 1} แล้ว`);
854
+ });
855
+ }
856
+ function toggleJsonPreview() {
857
+ if (!elements.jsonPreview || !elements.jsonChevron) return;
858
+
859
+ const isHidden = elements.jsonPreview.classList.contains('hidden');
860
+ if (isHidden) {
861
+ elements.jsonPreview.classList.remove('hidden');
862
+ elements.jsonChevron.style.transform = 'rotate(180deg)';
863
+ } else {
864
+ elements.jsonPreview.classList.add('hidden');
865
+ elements.jsonChevron.style.transform = 'rotate(0deg)';
866
+ }
867
+ }
868
+ function resetApplication() {
869
+ // Abort any ongoing operations
870
+ if (state.abortController) {
871
+ state.abortController.abort();
872
+ }
873
+
874
+ if (state.ocrWorker) {
875
+ state.ocrWorker.terminate();
876
+ state.ocrWorker = null;
877
+ }
878
+
879
+ state.currentFile = null;
880
+ state.fileType = null;
881
+ state.pdfDoc = null;
882
+ state.currentPage = 1;
883
+ state.totalPages = 0;
884
+ state.extractedData = [];
885
+ state.extractedDataWithCoords = [];
886
+ state.isProcessing = false;
887
+ state.abortController = null;
888
+
889
+ if (elements.uploadSection) elements.uploadSection.classList.remove('hidden');
890
+ if (elements.previewSection) elements.previewSection.classList.add('hidden');
891
+ if (elements.resultsSection) elements.resultsSection.classList.add('hidden');
892
+ if (elements.previewContainer) elements.previewContainer.innerHTML = '';
893
+ if (elements.pageSelector) {
894
+ elements.pageSelector.innerHTML = '';
895
+ elements.pageSelector.classList.add('hidden');
896
+ }
897
+ if (elements.jsonContent) elements.jsonContent.textContent = '';
898
+
899
+ showToast('รีเซ็ตระบบเรียบร้อย');
900
+ }
901
+ function showProcessingModal(title, status, percent) {
902
+ if (!elements.processingModal) return;
903
+
904
+ if (elements.processingTitle) elements.processingTitle.textContent = title;
905
+ if (elements.processingStatus) elements.processingStatus.textContent = status;
906
+ if (elements.progressBar) elements.progressBar.style.width = percent + '%';
907
+ if (elements.progressPercent) elements.progressPercent.textContent = Math.round(percent) + '%';
908
+ elements.processingModal.classList.remove('hidden');
909
+ }
910
+ function updateProcessingProgress(status, percent) {
911
+ if (!elements.processingModal) return;
912
+
913
+ if (elements.processingStatus) elements.processingStatus.textContent = status;
914
+ if (elements.progressBar) elements.progressBar.style.width = percent + '%';
915
+ if (elements.progressPercent) elements.progressPercent.textContent = Math.round(percent) + '%';
916
+ }
917
+ function hideProcessingModal() {
918
+ if (elements.processingModal) {
919
+ elements.processingModal.classList.add('hidden');
920
+ }
921
+ }
922
+ function showToast(message, type = 'success') {
923
+ if (!elements.toast || !elements.toastMessage) return;
924
+
925
+ elements.toastMessage.textContent = message;
926
+ const icon = elements.toast.querySelector('i');
927
+
928
+ if (type === 'error') {
929
+ icon.classList.remove('text-green-400');
930
+ icon.classList.add('text-red-400');
931
+ icon.setAttribute('data-feather', 'alert-circle');
932
+ } else {
933
+ icon.classList.remove('text-red-400');
934
+ icon.classList.add('text-green-400');
935
+ icon.setAttribute('data-feather', 'check-circle');
936
+ }
937
+
938
+ feather.replace();
939
+
940
+ elements.toast.classList.remove('opacity-0', 'translate-y-10', 'pointer-events-none');
941
+
942
+ setTimeout(() => {
943
+ elements.toast.classList.add('opacity-0', 'translate-y-10', 'pointer-events-none');
944
+ }, 3000);
945
+ }
946
+ function escapeHtml(text) {
947
+ const div = document.createElement('div');
948
+ div.textContent = text;
949
+ return div.innerHTML;
950
+ }