/** * Main Alpine.js application for OCR Text Explorer */ document.addEventListener('alpine:init', () => { Alpine.data('ocrExplorer', () => ({ // Dataset state datasetId: 'davanstrien/exams-ocr', datasetConfig: 'default', datasetSplit: 'train', // Navigation state currentIndex: 0, totalSamples: null, currentSample: null, jumpToPage: '', // UI state loading: false, error: null, activeTab: 'comparison', diffMode: 'char', darkMode: false, showAbout: false, showFlowView: false, showDock: false, // Flow view state flowItems: [], flowStartIndex: 0, flowVisibleCount: 7, flowOffset: 0, // Dock state dockItems: [], dockHideTimeout: null, dockStartIndex: 0, dockVisibleCount: 10, // Computed diff HTML diffHtml: '', // Statistics similarity: 0, charStats: { total: 0, added: 0, removed: 0 }, wordStats: { original: 0, improved: 0 }, // API instance api: null, async init() { // Initialize API this.api = new DatasetAPI(); // Apply dark mode from localStorage this.darkMode = localStorage.getItem('darkMode') === 'true'; this.$watch('darkMode', value => { localStorage.setItem('darkMode', value); document.documentElement.classList.toggle('dark', value); }); document.documentElement.classList.toggle('dark', this.darkMode); // Setup keyboard navigation this.setupKeyboardNavigation(); // Load initial dataset await this.loadDataset(); }, setupKeyboardNavigation() { document.addEventListener('keydown', (e) => { // Ignore if user is typing in input if (e.target.tagName === 'INPUT') return; switch(e.key) { case 'ArrowLeft': e.preventDefault(); if (e.shiftKey && this.showDock) { this.scrollDockLeft(); } else { this.previousSample(); } break; case 'ArrowRight': e.preventDefault(); if (e.shiftKey && this.showDock) { this.scrollDockRight(); } else { this.nextSample(); } break; case 'k': case 'K': e.preventDefault(); this.previousSample(); break; case 'j': case 'J': e.preventDefault(); this.nextSample(); break; case '1': this.activeTab = 'comparison'; break; case '2': this.activeTab = 'diff'; break; case '3': this.activeTab = 'improved'; break; case 'v': case 'V': // Toggle dock with V key if (this.showDock) { this.hideDockPreview(); } else { this.showDockPreview(); } break; } }); }, async loadDataset() { this.loading = true; this.error = null; try { // Validate dataset await this.api.validateDataset(this.datasetId); // Get dataset info const info = await this.api.getDatasetInfo(this.datasetId); this.datasetConfig = info.defaultConfig; this.datasetSplit = info.defaultSplit; // Get total rows this.totalSamples = await this.api.getTotalRows( this.datasetId, this.datasetConfig, this.datasetSplit ); // Load first sample this.currentIndex = 0; await this.loadSample(0); } catch (error) { this.error = error.message; } finally { this.loading = false; } }, async loadSample(index) { try { const data = await this.api.getRow( this.datasetId, this.datasetConfig, this.datasetSplit, index ); this.currentSample = data.row; this.currentIndex = index; // Update diff when sample changes this.updateDiff(); // Update URL without triggering navigation const url = new URL(window.location); url.searchParams.set('dataset', this.datasetId); url.searchParams.set('index', index); window.history.replaceState({}, '', url); } catch (error) { this.error = `Failed to load sample: ${error.message}`; } }, async nextSample() { if (this.currentIndex < this.totalSamples - 1) { await this.loadSample(this.currentIndex + 1); } }, async previousSample() { if (this.currentIndex > 0) { await this.loadSample(this.currentIndex - 1); } }, async jumpToSample() { const pageNum = parseInt(this.jumpToPage); if (!isNaN(pageNum) && pageNum >= 1 && pageNum <= this.totalSamples) { // Convert 1-based page number to 0-based index await this.loadSample(pageNum - 1); // Clear the input after jumping this.jumpToPage = ''; } else { // Show error or just reset this.jumpToPage = ''; } }, getOriginalText() { if (!this.currentSample) return ''; const columns = this.api.detectColumns(null, this.currentSample); return this.currentSample[columns.originalText] || 'No original text found'; }, getImprovedText() { if (!this.currentSample) return ''; const columns = this.api.detectColumns(null, this.currentSample); return this.currentSample[columns.improvedText] || 'No improved text found'; }, getImageData() { if (!this.currentSample) return null; const columns = this.api.detectColumns(null, this.currentSample); return columns.image ? this.currentSample[columns.image] : null; }, getImageSrc() { const imageData = this.getImageData(); return imageData?.src || ''; }, getImageDimensions() { const imageData = this.getImageData(); if (imageData?.width && imageData?.height) { return `${imageData.width}×${imageData.height}`; } return null; }, updateDiff() { const original = this.getOriginalText(); const improved = this.getImprovedText(); // Calculate statistics this.calculateStatistics(original, improved); // Use diff utility based on mode switch(this.diffMode) { case 'char': this.diffHtml = createCharacterDiff(original, improved); break; case 'word': this.diffHtml = createWordDiff(original, improved); break; case 'line': this.diffHtml = createLineDiff(original, improved); break; } }, calculateStatistics(original, improved) { // Calculate similarity this.similarity = calculateSimilarity(original, improved); // Character statistics const charDiff = this.getCharacterDiffStats(original, improved); this.charStats = charDiff; // Word statistics const originalWords = original.split(/\s+/).filter(w => w.length > 0); const improvedWords = improved.split(/\s+/).filter(w => w.length > 0); this.wordStats = { original: originalWords.length, improved: improvedWords.length }; }, getCharacterDiffStats(original, improved) { const dp = computeLCS(original, improved); const diff = buildDiff(original, improved, dp); let added = 0; let removed = 0; let unchanged = 0; for (const part of diff) { if (part.type === 'insert') { added += part.value.length; } else if (part.type === 'delete') { removed += part.value.length; } else { unchanged += part.value.length; } } return { total: original.length, added: added, removed: removed, unchanged: unchanged }; }, async handleImageError(event) { // Try to refresh the image URL console.log('Image failed to load, refreshing URL...'); try { const data = await this.api.refreshImageUrl( this.datasetId, this.datasetConfig, this.datasetSplit, this.currentIndex ); // Update the image source if (data.row && data.row[this.api.detectColumns(null, data.row).image]?.src) { event.target.src = data.row[this.api.detectColumns(null, data.row).image].src; } } catch (error) { console.error('Failed to refresh image URL:', error); // Set a placeholder image event.target.src = ''; } }, exportComparison() { const original = this.getOriginalText(); const improved = this.getImprovedText(); const metadata = { dataset: this.datasetId, page: this.currentIndex + 1, totalPages: this.totalSamples, exportDate: new Date().toISOString(), similarity: `${this.similarity}%`, statistics: { characters: this.charStats, words: this.wordStats } }; // Create export content let content = `OCR Text Comparison Export\n`; content += `==========================\n\n`; content += `Dataset: ${metadata.dataset}\n`; content += `Page: ${metadata.page} of ${metadata.totalPages}\n`; content += `Export Date: ${new Date().toLocaleString()}\n`; content += `Similarity: ${metadata.similarity}\n`; content += `Characters: ${metadata.statistics.characters.total} total, `; content += `${metadata.statistics.characters.added} added, `; content += `${metadata.statistics.characters.removed} removed\n`; content += `Words: ${metadata.statistics.words.original} → ${metadata.statistics.words.improved}\n`; content += `\n${'='.repeat(50)}\n\n`; content += `ORIGINAL OCR:\n`; content += `${'='.repeat(50)}\n`; content += original; content += `\n\n${'='.repeat(50)}\n\n`; content += `IMPROVED OCR:\n`; content += `${'='.repeat(50)}\n`; content += improved; // Download file const blob = new Blob([content], { type: 'text/plain' }); const url = URL.createObjectURL(blob); const a = document.createElement('a'); a.href = url; a.download = `ocr-comparison-${this.datasetId.replace('/', '-')}-page-${this.currentIndex + 1}.txt`; document.body.appendChild(a); a.click(); document.body.removeChild(a); URL.revokeObjectURL(url); }, // Flow view methods async toggleFlowView() { this.showFlowView = !this.showFlowView; if (this.showFlowView) { // Reset to center around current page when opening this.flowStartIndex = Math.max(0, this.currentIndex - Math.floor(this.flowVisibleCount / 2)); await this.loadFlowItems(); } }, async loadFlowItems() { // Load thumbnails from flowStartIndex const startIdx = this.flowStartIndex; this.flowItems = []; // Load visible items for (let i = 0; i < this.flowVisibleCount && (startIdx + i) < this.totalSamples; i++) { const idx = startIdx + i; try { const data = await this.api.getRow( this.datasetId, this.datasetConfig, this.datasetSplit, idx ); const columns = this.api.detectColumns(null, data.row); const imageData = columns.image ? data.row[columns.image] : null; this.flowItems.push({ index: idx, imageSrc: imageData?.src || '', row: data.row }); } catch (error) { console.error(`Failed to load flow item ${idx}:`, error); } } }, scrollFlowLeft() { if (this.flowStartIndex > 0) { this.flowStartIndex = Math.max(0, this.flowStartIndex - this.flowVisibleCount); this.loadFlowItems(); } }, scrollFlowRight() { if (this.flowStartIndex < this.totalSamples - this.flowVisibleCount) { this.flowStartIndex = Math.min( this.totalSamples - this.flowVisibleCount, this.flowStartIndex + this.flowVisibleCount ); this.loadFlowItems(); } }, async jumpToFlowPage(index) { this.showFlowView = false; await this.loadSample(index); }, async handleFlowImageError(event, index) { // Try to refresh the image URL for flow item try { const data = await this.api.refreshImageUrl( this.datasetId, this.datasetConfig, this.datasetSplit, index ); if (data.row) { const columns = this.api.detectColumns(null, data.row); const imageData = columns.image ? data.row[columns.image] : null; if (imageData?.src) { event.target.src = imageData.src; // Update the flow item const flowItem = this.flowItems.find(item => item.index === index); if (flowItem) { flowItem.imageSrc = imageData.src; } } } } catch (error) { console.error('Failed to refresh flow image URL:', error); } }, // Dock methods async showDockPreview() { // Clear any hide timeout if (this.dockHideTimeout) { clearTimeout(this.dockHideTimeout); this.dockHideTimeout = null; } this.showDock = true; // Center dock around current page this.dockStartIndex = Math.max(0, Math.min( this.currentIndex - Math.floor(this.dockVisibleCount / 2), this.totalSamples - this.dockVisibleCount ) ); // Always reload dock items to show current position await this.loadDockItems(); }, hideDockPreview() { // Add a small delay to prevent flickering this.dockHideTimeout = setTimeout(() => { this.showDock = false; }, 300); }, async loadDockItems() { // Load thumbnails based on dock start index const endIdx = Math.min(this.totalSamples, this.dockStartIndex + this.dockVisibleCount); this.dockItems = []; for (let i = this.dockStartIndex; i < endIdx; i++) { try { const data = await this.api.getRow( this.datasetId, this.datasetConfig, this.datasetSplit, i ); const columns = this.api.detectColumns(null, data.row); const imageData = columns.image ? data.row[columns.image] : null; this.dockItems.push({ index: i, imageSrc: imageData?.src || '', row: data.row }); } catch (error) { console.error(`Failed to load dock item ${i}:`, error); } } }, async scrollDockLeft() { if (this.dockStartIndex > 0) { this.dockStartIndex = Math.max(0, this.dockStartIndex - Math.floor(this.dockVisibleCount / 2)); await this.loadDockItems(); } }, async scrollDockRight() { if (this.dockStartIndex < this.totalSamples - this.dockVisibleCount) { this.dockStartIndex = Math.min( this.totalSamples - this.dockVisibleCount, this.dockStartIndex + Math.floor(this.dockVisibleCount / 2) ); await this.loadDockItems(); } }, async jumpToDockPage(index) { this.showDock = false; await this.loadSample(index); }, // Watch for diff mode changes initWatchers() { this.$watch('diffMode', () => this.updateDiff()); this.$watch('currentSample', () => this.updateDiff()); } })); }); // Initialize watchers after Alpine loads document.addEventListener('alpine:initialized', () => { Alpine.store('ocrExplorer')?.initWatchers?.(); });