Spaces:
Running
Running
| /** | |
| * RAG Axiom Extractor - Main Application Logic | |
| * Handles UI, document processing, and worker communication | |
| */ | |
| class RAGApplication { | |
| constructor() { | |
| this.worker = null; | |
| this.documents = new Map(); | |
| this.axioms = []; | |
| this.embeddings = new Map(); | |
| this.isInitialized = false; | |
| this.init(); | |
| } | |
| init() { | |
| // Initialize web worker | |
| this.worker = new Worker('worker.js'); | |
| // Set up worker message handlers | |
| this.worker.onmessage = (e) => this.handleWorkerMessage(e); | |
| this.worker.onerror = (e) => this.handleWorkerError(e); | |
| // Set up UI event listeners | |
| this.setupEventListeners(); | |
| // Load saved data from localStorage | |
| this.loadSavedData(); | |
| } | |
| setupEventListeners() { | |
| // File upload | |
| const fileInput = document.getElementById('fileInput'); | |
| const uploadArea = document.getElementById('uploadArea'); | |
| fileInput.addEventListener('change', (e) => this.handleFileUpload(e.target.files)); | |
| // Drag and drop | |
| uploadArea.addEventListener('dragover', (e) => { | |
| e.preventDefault(); | |
| uploadArea.classList.add('dragover'); | |
| }); | |
| uploadArea.addEventListener('dragleave', () => { | |
| uploadArea.classList.remove('dragover'); | |
| }); | |
| uploadArea.addEventListener('drop', (e) => { | |
| e.preventDefault(); | |
| uploadArea.classList.remove('dragover'); | |
| this.handleFileUpload(e.dataTransfer.files); | |
| }); | |
| // Generation | |
| document.getElementById('generateBtn').addEventListener('click', () => this.generateResponse()); | |
| document.getElementById('queryInput').addEventListener('keypress', (e) => { | |
| if (e.key === 'Enter' && e.ctrlKey) { | |
| this.generateResponse(); | |
| } | |
| }); | |
| // Actions | |
| document.getElementById('clearAllBtn').addEventListener('click', () => this.clearAllData()); | |
| document.getElementById('exportBtn').addEventListener('click', () => this.exportAxioms()); | |
| } | |
| handleWorkerMessage(e) { | |
| const { type, payload } = e.data; | |
| switch (type) { | |
| case 'progress': | |
| this.updateProgress(payload); | |
| break; | |
| case 'status': | |
| this.updateStatus(payload.model, payload.status); | |
| break; | |
| case 'ready': | |
| this.isInitialized = true; | |
| this.updateUI(); | |
| break; | |
| case 'embeddingComplete': | |
| this.handleEmbeddingComplete(payload); | |
| break; | |
| case 'axiomExtractionComplete': | |
| this.handleAxiomExtractionComplete(payload); | |
| break; | |
| case 'generationComplete': | |
| this.handleGenerationComplete(payload); | |
| break; | |
| case 'error': | |
| this.handleError(payload.error); | |
| break; | |
| } | |
| } | |
| handleWorkerError(error) { | |
| console.error('Worker error:', error); | |
| this.showNotification('Worker error occurred. Check console for details.', 'error'); | |
| } | |
| updateProgress({ progress, model, loaded, total }) { | |
| const progressContainer = document.getElementById('progressContainer'); | |
| const progressBar = document.getElementById('progressBar'); | |
| const progressText = document.getElementById('progressText'); | |
| progressContainer.classList.remove('hidden'); | |
| progressBar.style.width = `${progress * 100}%`; | |
| progressText.textContent = `${Math.round(progress * 100)}% (${(loaded / 1024 / 1024).toFixed(1)}MB / ${(total / 1024 / 1024).toFixed(1)}MB)`; | |
| } | |
| updateStatus(model, status) { | |
| const statusElement = model === 'embedding' | |
| ? document.getElementById('embeddingStatus') | |
| : document.getElementById('generationStatus'); | |
| statusElement.textContent = status; | |
| statusElement.className = `status ${status.includes('✅') ? 'ready' : 'pending'}`; | |
| } | |
| async handleFileUpload(files) { | |
| if (!this.isInitialized) { | |
| this.showNotification('Please wait for models to finish loading.', 'warning'); | |
| return; | |
| } | |
| const uploadStatus = document.getElementById('uploadStatus'); | |
| uploadStatus.innerHTML = '<span class="loading">Processing files...</span>'; | |
| for (const file of files) { | |
| try { | |
| const text = await this.readFile(file); | |
| const docId = `${file.name}-${Date.now()}`; | |
| // Store document | |
| this.documents.set(docId, { | |
| id: docId, | |
| name: file.name, | |
| size: file.size, | |
| content: text, | |
| chunks: this.chunkText(text), | |
| uploadedAt: new Date().toISOString() | |
| }); | |
| // Generate embeddings for chunks | |
| this.worker.postMessage({ | |
| type: 'generateEmbeddings', | |
| payload: { | |
| docId, | |
| chunks: this.documents.get(docId).chunks | |
| } | |
| }); | |
| } catch (error) { | |
| console.error(`Error processing ${file.name}:`, error); | |
| this.showNotification(`Error processing ${file.name}`, 'error'); | |
| } | |
| } | |
| uploadStatus.innerHTML = '<span class="success">Files processed! Generating embeddings...</span>'; | |
| setTimeout(() => uploadStatus.innerHTML = '', 3000); | |
| this.saveData(); | |
| } | |
| readFile(file) { | |
| return new Promise((resolve, reject) => { | |
| const reader = new FileReader(); | |
| reader.onload = (e) => resolve(e.target.result); | |
| reader.onerror = (e) => reject(e); | |
| reader.readAsText(file); | |
| }); | |
| } | |
| chunkText(text, chunkSize = 200, overlap = 50) { | |
| const words = text.split(/\s+/); | |
| const chunks = []; | |
| for (let i = 0; i < words.length; i += chunkSize - overlap) { | |
| const chunk = words.slice(i, i + chunkSize).join(' '); | |
| if (chunk.length > 50) { | |
| chunks.push(chunk); | |
| } | |
| } | |
| return chunks; | |
| } | |
| handleEmbeddingComplete({ docId, embeddings }) { | |
| this.embeddings.set(docId, embeddings); | |
| this.extractAxioms(docId); | |
| this.updateDocumentList(); | |
| } | |
| extractAxioms(docId) { | |
| const document = this.documents.get(docId); | |
| if (!document) return; | |
| this.worker.postMessage({ | |
| type: 'extractAxioms', | |
| payload: { | |
| docId, | |
| text: document.content | |
| } | |
| }); | |
| } | |
| handleAxiomExtractionComplete({ axioms }) { | |
| this.axioms.push(...axioms); | |
| this.updateAxiomList(); | |
| this.saveData(); | |
| } | |
| async generateResponse() { | |
| const query = document.getElementById('queryInput').value.trim(); | |
| if (!query) { | |
| this.showNotification('Please enter a query.', 'warning'); | |
| return; | |
| } | |
| if (!this.isInitialized) { | |
| this.showNotification('Models are still loading.', 'warning'); | |
| return; | |
| } | |
| const useAxioms = document.getElementById('useAxioms').checked; | |
| const useContext = document.getElementById('useContext').checked; | |
| // Show loading state | |
| const generateBtn = document.getElementById('generateBtn'); | |
| const statusIndicator = document.getElementById('generationStatusIndicator'); | |
| generateBtn.disabled = true; | |
| generateBtn.textContent = 'Generating...'; | |
| statusIndicator.innerHTML = '<span class="loading">Retrieving relevant context...</span>'; | |
| // Prepare context | |
| let context = ''; | |
| let retrievedChunks = []; | |
| if (useContext && this.embeddings.size > 0) { | |
| // Retrieve relevant chunks | |
| retrievedChunks = await this.retrieveRelevantChunks(query, 5); | |
| context = retrievedChunks.map(c => c.text).join('\n\n'); | |
| } | |
| if (useAxioms && this.axioms.length > 0) { | |
| // Add axioms to context | |
| const relevantAxioms = this.getRelevantAxioms(query); | |
| if (relevantAxioms.length > 0) { | |
| context += '\n\n=== EXTRACTED AXIOMS ===\n'; | |
| context += relevantAxioms.map(a => `• ${a.text}`).join('\n'); | |
| } | |
| } | |
| // Show retrieved context | |
| this.updateRetrievedContext(retrievedChunks); | |
| statusIndicator.innerHTML = '<span class="loading">Generating response...</span>'; | |
| // Send generation request to worker | |
| this.worker.postMessage({ | |
| type: 'generate', | |
| payload: { | |
| query, | |
| context, | |
| maxLength: 512 | |
| } | |
| }); | |
| } | |
| async retrieveRelevantChunks(query, topK = 5) { | |
| if (this.embeddings.size === 0) return []; | |
| // Get query embedding | |
| const queryEmbedding = await this.getQueryEmbedding(query); | |
| // Calculate similarities | |
| const scores = []; | |
| for (const [docId, docEmbeddings] of this.embeddings) { | |
| for (let i = 0; i < docEmbeddings.length; i++) { | |
| const similarity = this.cosineSimilarity(queryEmbedding, docEmbeddings[i]); | |
| scores.push({ | |
| docId, | |
| chunkIndex: i, | |
| similarity, | |
| text: this.documents.get(docId).chunks[i] | |
| }); | |
| } | |
| } | |
| // Sort by similarity and return top K | |
| return scores | |
| .sort((a, b) => b.similarity - a.similarity) | |
| .slice(0, topK); | |
| } | |
| async getQueryEmbedding(query) { | |
| return new Promise((resolve) => { | |
| const messageId = `embedding-${Date.now()}`; | |
| const handleResponse = (e) => { | |
| if (e.data.type === 'queryEmbeddingComplete' && e.data.payload.messageId === messageId) { | |
| this.worker.removeEventListener('message', handleResponse); | |
| resolve(e.data.payload.embedding); | |
| } | |
| }; | |
| this.worker.addEventListener('message', handleResponse); | |
| this.worker.postMessage({ | |
| type: 'generateQueryEmbedding', | |
| payload: { query, messageId } | |
| }); | |
| }); | |
| } | |
| cosineSimilarity(a, b) { | |
| if (!a || !b) return 0; | |
| const dotProduct = a.reduce((sum, val, i) => sum + val * b[i], 0); | |
| const magnitudeA = Math.sqrt(a.reduce((sum, val) => sum + val * val, 0)); | |
| const magnitudeB = Math.sqrt(b.reduce((sum, val) => sum + val * val, 0)); | |
| return magnitudeA && magnitudeB ? dotProduct / (magnitudeA * magnitudeB) : 0; | |
| } | |
| getRelevantAxioms(query, topK = 10) { | |
| // Simple keyword matching for relevant axioms | |
| const queryWords = query.toLowerCase().split(/\s+/).filter(w => w.length > 3); | |
| return this.axioms | |
| .map(axiom => { | |
| const axiomWords = axiom.text.toLowerCase().split(/\s+/); | |
| const matches = queryWords.filter(w => axiomWords.includes(w)).length; | |
| return { ...axiom, relevance: matches }; | |
| }) | |
| .filter(a => a.relevance > 0) | |
| .sort((a, b) => b.relevance - a.relevance) | |
| .slice(0, topK); | |
| } | |
| updateRetrievedContext(chunks) { | |
| const contextDiv = document.getElementById('retrievedContext'); | |
| if (chunks.length === 0) { | |
| contextDiv.innerHTML = '<p class="empty-state">No relevant context found.</p>'; | |
| return; | |
| } | |
| contextDiv.innerHTML = chunks.map((chunk, i) => ` | |
| <div class="context-item"> | |
| <div class="context-header"> | |
| <strong>Chunk ${i + 1}</strong> | |
| <span class="similarity">Similarity: ${(chunk.similarity * 100).toFixed(1)}%</span> | |
| </div> | |
| <p class="context-text">${chunk.text}</p> | |
| </div> | |
| `).join(''); | |
| } | |
| handleGenerationComplete({ response }) { | |
| const generateBtn = document.getElementById('generateBtn'); | |
| const statusIndicator = document.getElementById('generationStatusIndicator'); | |
| generateBtn.disabled = false; | |
| generateBtn.textContent = 'Generate Response'; | |
| statusIndicator.innerHTML = ''; | |
| document.getElementById('responseText').textContent = response; | |
| document.getElementById('generationResult').classList.remove('hidden'); | |
| } | |
| updateDocumentList() { | |
| const listContainer = document.getElementById('documentList'); | |
| if (this.documents.size === 0) { | |
| listContainer.innerHTML = '<p class="empty-state">No documents indexed yet. Upload files to begin.</p>'; | |
| return; | |
| } | |
| listContainer.innerHTML = Array.from(this.documents.values()).map(doc => ` | |
| <div class="document-item"> | |
| <div class="doc-header"> | |
| <strong>${doc.name}</strong> | |
| <button class="remove-btn" onclick="app.removeDocument('${doc.id}')">Remove</button> | |
| </div> | |
| <div class="doc-info"> | |
| <span>${(doc.size / 1024).toFixed(1)} KB</span> | |
| <span>${doc.chunks.length} chunks</span> | |
| </div> | |
| <div class="doc-preview">${doc.content.substring(0, 150)}...</div> | |
| </div> | |
| `).join(''); | |
| } | |
| updateAxiomList() { | |
| const listContainer = document.getElementById('axiomList'); | |
| if (this.axioms.length === 0) { | |
| listContainer.innerHTML = '<p class="empty-state">No axioms extracted yet. Upload and index documents to extract axioms.</p>'; | |
| return; | |
| } | |
| listContainer.innerHTML = this.axioms.map((axiom, i) => ` | |
| <div class="axiom-item"> | |
| <div class="axiom-header"> | |
| <strong>Axiom ${i + 1}</strong> | |
| <span class="source">from: ${axiom.source}</span> | |
| </div> | |
| <p class="axiom-text">${axiom.text}</p> | |
| </div> | |
| `).join(''); | |
| } | |
| removeDocument(docId) { | |
| this.documents.delete(docId); | |
| this.embeddings.delete(docId); | |
| this.axioms = this.axioms.filter(a => a.docId !== docId); | |
| this.updateDocumentList(); | |
| this.updateAxiomList(); | |
| this.saveData(); | |
| this.showNotification('Document removed successfully.', 'success'); | |
| } | |
| clearAllData() { | |
| if (confirm('Are you sure you want to clear all indexed documents and axioms? This cannot be undone.')) { | |
| this.documents.clear(); | |
| this.embeddings.clear(); | |
| this.axioms = []; | |
| localStorage.removeItem('rag-documents'); | |
| localStorage.removeItem('rag-axioms'); | |
| this.updateDocumentList(); | |
| this.updateAxiomList(); | |
| this.showNotification('All data cleared.', 'success'); | |
| } | |
| } | |
| exportAxioms() { | |
| if (this.axioms.length === 0) { | |
| this.showNotification('No axioms to export.', 'warning'); | |
| return; | |
| } | |
| const exportData = { | |
| exportedAt: new Date().toISOString(), | |
| axioms: this.axioms, | |
| documentCount: this.documents.size | |
| }; | |
| const blob = new Blob([JSON.stringify(exportData, null, 2)], { type: 'application/json' }); | |
| const url = URL.createObjectURL(blob); | |
| const a = document.createElement('a'); | |
| a.href = url; | |
| a.download = `axioms-${new Date().toISOString().split('T')[0]}.json`; | |
| a.click(); | |
| URL.revokeObjectURL(url); | |
| this.showNotification('Axioms exported successfully.', 'success'); | |
| } | |
| saveData() { | |
| // Save to localStorage | |
| const docsData = Array.from(this.documents.entries()); | |
| localStorage.setItem('rag-documents', JSON.stringify(docsData)); | |
| localStorage.setItem('rag-axioms', JSON.stringify(this.axioms)); | |
| } | |
| loadSavedData() { | |
| try { | |
| // Load documents | |
| const docsData = localStorage.getItem('rag-documents'); | |
| if (docsData) { | |
| const docsArray = JSON.parse(docsData); | |
| this.documents = new Map(docsArray); | |
| this.updateDocumentList(); | |
| } | |
| // Load axioms | |
| const axiomsData = localStorage.getItem('rag-axioms'); | |
| if (axiomsData) { | |
| this.axioms = JSON.parse(axiomsData); | |
| this.updateAxiomList(); | |
| } | |
| } catch (error) { | |
| console.error('Error loading saved data:', error); | |
| } | |
| } | |
| updateUI() { | |
| // Enable generate button when models are ready | |
| document.getElementById('generateBtn').disabled = false; | |
| this.showNotification('Models loaded successfully! Ready to process documents.', 'success'); | |
| } | |
| handleError(error) { | |
| console.error('Application error:', error); | |
| this.showNotification(`Error: ${error.message || 'Unknown error'}`, 'error'); | |
| // Reset UI state | |
| const generateBtn = document.getElementById('generateBtn'); | |
| generateBtn.disabled = false; | |
| generateBtn.textContent = 'Generate Response'; | |
| document.getElementById('generationStatusIndicator').innerHTML = ''; | |
| } | |
| showNotification(message, type = 'info') { | |
| const notification = document.createElement('div'); | |
| notification.className = `notification ${type}`; | |
| notification.textContent = message; | |
| document.body.appendChild(notification); | |
| setTimeout(() => { | |
| notification.classList.add('show'); | |
| }, 10); | |
| setTimeout(() => { | |
| notification.classList.remove('show'); | |
| setTimeout(() => notification.remove(), 300); | |
| }, 3000); | |
| } | |
| } | |
| // Initialize application | |
| const app = new RAGApplication(); |