ldostadi commited on
Commit
cffdb2f
·
verified ·
1 Parent(s): b2ba210

Upload index.js with huggingface_hub

Browse files
Files changed (1) hide show
  1. index.js +527 -0
index.js ADDED
@@ -0,0 +1,527 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /**
2
+ * RAG Axiom Extractor - Main Application Logic
3
+ * Handles UI, document processing, and worker communication
4
+ */
5
+
6
+ class RAGApplication {
7
+ constructor() {
8
+ this.worker = null;
9
+ this.documents = new Map();
10
+ this.axioms = [];
11
+ this.embeddings = new Map();
12
+ this.isInitialized = false;
13
+
14
+ this.init();
15
+ }
16
+
17
+ init() {
18
+ // Initialize web worker
19
+ this.worker = new Worker('worker.js');
20
+
21
+ // Set up worker message handlers
22
+ this.worker.onmessage = (e) => this.handleWorkerMessage(e);
23
+ this.worker.onerror = (e) => this.handleWorkerError(e);
24
+
25
+ // Set up UI event listeners
26
+ this.setupEventListeners();
27
+
28
+ // Load saved data from localStorage
29
+ this.loadSavedData();
30
+ }
31
+
32
+ setupEventListeners() {
33
+ // File upload
34
+ const fileInput = document.getElementById('fileInput');
35
+ const uploadArea = document.getElementById('uploadArea');
36
+
37
+ fileInput.addEventListener('change', (e) => this.handleFileUpload(e.target.files));
38
+
39
+ // Drag and drop
40
+ uploadArea.addEventListener('dragover', (e) => {
41
+ e.preventDefault();
42
+ uploadArea.classList.add('dragover');
43
+ });
44
+
45
+ uploadArea.addEventListener('dragleave', () => {
46
+ uploadArea.classList.remove('dragover');
47
+ });
48
+
49
+ uploadArea.addEventListener('drop', (e) => {
50
+ e.preventDefault();
51
+ uploadArea.classList.remove('dragover');
52
+ this.handleFileUpload(e.dataTransfer.files);
53
+ });
54
+
55
+ // Generation
56
+ document.getElementById('generateBtn').addEventListener('click', () => this.generateResponse());
57
+ document.getElementById('queryInput').addEventListener('keypress', (e) => {
58
+ if (e.key === 'Enter' && e.ctrlKey) {
59
+ this.generateResponse();
60
+ }
61
+ });
62
+
63
+ // Actions
64
+ document.getElementById('clearAllBtn').addEventListener('click', () => this.clearAllData());
65
+ document.getElementById('exportBtn').addEventListener('click', () => this.exportAxioms());
66
+ }
67
+
68
+ handleWorkerMessage(e) {
69
+ const { type, payload } = e.data;
70
+
71
+ switch (type) {
72
+ case 'progress':
73
+ this.updateProgress(payload);
74
+ break;
75
+
76
+ case 'status':
77
+ this.updateStatus(payload.model, payload.status);
78
+ break;
79
+
80
+ case 'ready':
81
+ this.isInitialized = true;
82
+ this.updateUI();
83
+ break;
84
+
85
+ case 'embeddingComplete':
86
+ this.handleEmbeddingComplete(payload);
87
+ break;
88
+
89
+ case 'axiomExtractionComplete':
90
+ this.handleAxiomExtractionComplete(payload);
91
+ break;
92
+
93
+ case 'generationComplete':
94
+ this.handleGenerationComplete(payload);
95
+ break;
96
+
97
+ case 'error':
98
+ this.handleError(payload.error);
99
+ break;
100
+ }
101
+ }
102
+
103
+ handleWorkerError(error) {
104
+ console.error('Worker error:', error);
105
+ this.showNotification('Worker error occurred. Check console for details.', 'error');
106
+ }
107
+
108
+ updateProgress({ progress, model, loaded, total }) {
109
+ const progressContainer = document.getElementById('progressContainer');
110
+ const progressBar = document.getElementById('progressBar');
111
+ const progressText = document.getElementById('progressText');
112
+
113
+ progressContainer.classList.remove('hidden');
114
+ progressBar.style.width = `${progress * 100}%`;
115
+ progressText.textContent = `${Math.round(progress * 100)}% (${(loaded / 1024 / 1024).toFixed(1)}MB / ${(total / 1024 / 1024).toFixed(1)}MB)`;
116
+ }
117
+
118
+ updateStatus(model, status) {
119
+ const statusElement = model === 'embedding'
120
+ ? document.getElementById('embeddingStatus')
121
+ : document.getElementById('generationStatus');
122
+
123
+ statusElement.textContent = status;
124
+ statusElement.className = `status ${status.includes('✅') ? 'ready' : 'pending'}`;
125
+ }
126
+
127
+ async handleFileUpload(files) {
128
+ if (!this.isInitialized) {
129
+ this.showNotification('Please wait for models to finish loading.', 'warning');
130
+ return;
131
+ }
132
+
133
+ const uploadStatus = document.getElementById('uploadStatus');
134
+ uploadStatus.innerHTML = '<span class="loading">Processing files...</span>';
135
+
136
+ for (const file of files) {
137
+ try {
138
+ const text = await this.readFile(file);
139
+ const docId = `${file.name}-${Date.now()}`;
140
+
141
+ // Store document
142
+ this.documents.set(docId, {
143
+ id: docId,
144
+ name: file.name,
145
+ size: file.size,
146
+ content: text,
147
+ chunks: this.chunkText(text),
148
+ uploadedAt: new Date().toISOString()
149
+ });
150
+
151
+ // Generate embeddings for chunks
152
+ this.worker.postMessage({
153
+ type: 'generateEmbeddings',
154
+ payload: {
155
+ docId,
156
+ chunks: this.documents.get(docId).chunks
157
+ }
158
+ });
159
+
160
+ } catch (error) {
161
+ console.error(`Error processing ${file.name}:`, error);
162
+ this.showNotification(`Error processing ${file.name}`, 'error');
163
+ }
164
+ }
165
+
166
+ uploadStatus.innerHTML = '<span class="success">Files processed! Generating embeddings...</span>';
167
+ setTimeout(() => uploadStatus.innerHTML = '', 3000);
168
+ this.saveData();
169
+ }
170
+
171
+ readFile(file) {
172
+ return new Promise((resolve, reject) => {
173
+ const reader = new FileReader();
174
+ reader.onload = (e) => resolve(e.target.result);
175
+ reader.onerror = (e) => reject(e);
176
+ reader.readAsText(file);
177
+ });
178
+ }
179
+
180
+ chunkText(text, chunkSize = 200, overlap = 50) {
181
+ const words = text.split(/\s+/);
182
+ const chunks = [];
183
+
184
+ for (let i = 0; i < words.length; i += chunkSize - overlap) {
185
+ const chunk = words.slice(i, i + chunkSize).join(' ');
186
+ if (chunk.length > 50) {
187
+ chunks.push(chunk);
188
+ }
189
+ }
190
+
191
+ return chunks;
192
+ }
193
+
194
+ handleEmbeddingComplete({ docId, embeddings }) {
195
+ this.embeddings.set(docId, embeddings);
196
+ this.extractAxioms(docId);
197
+ this.updateDocumentList();
198
+ }
199
+
200
+ extractAxioms(docId) {
201
+ const document = this.documents.get(docId);
202
+ if (!document) return;
203
+
204
+ this.worker.postMessage({
205
+ type: 'extractAxioms',
206
+ payload: {
207
+ docId,
208
+ text: document.content
209
+ }
210
+ });
211
+ }
212
+
213
+ handleAxiomExtractionComplete({ axioms }) {
214
+ this.axioms.push(...axioms);
215
+ this.updateAxiomList();
216
+ this.saveData();
217
+ }
218
+
219
+ async generateResponse() {
220
+ const query = document.getElementById('queryInput').value.trim();
221
+ if (!query) {
222
+ this.showNotification('Please enter a query.', 'warning');
223
+ return;
224
+ }
225
+
226
+ if (!this.isInitialized) {
227
+ this.showNotification('Models are still loading.', 'warning');
228
+ return;
229
+ }
230
+
231
+ const useAxioms = document.getElementById('useAxioms').checked;
232
+ const useContext = document.getElementById('useContext').checked;
233
+
234
+ // Show loading state
235
+ const generateBtn = document.getElementById('generateBtn');
236
+ const statusIndicator = document.getElementById('generationStatusIndicator');
237
+ generateBtn.disabled = true;
238
+ generateBtn.textContent = 'Generating...';
239
+ statusIndicator.innerHTML = '<span class="loading">Retrieving relevant context...</span>';
240
+
241
+ // Prepare context
242
+ let context = '';
243
+ let retrievedChunks = [];
244
+
245
+ if (useContext && this.embeddings.size > 0) {
246
+ // Retrieve relevant chunks
247
+ retrievedChunks = await this.retrieveRelevantChunks(query, 5);
248
+ context = retrievedChunks.map(c => c.text).join('\n\n');
249
+ }
250
+
251
+ if (useAxioms && this.axioms.length > 0) {
252
+ // Add axioms to context
253
+ const relevantAxioms = this.getRelevantAxioms(query);
254
+ if (relevantAxioms.length > 0) {
255
+ context += '\n\n=== EXTRACTED AXIOMS ===\n';
256
+ context += relevantAxioms.map(a => `• ${a.text}`).join('\n');
257
+ }
258
+ }
259
+
260
+ // Show retrieved context
261
+ this.updateRetrievedContext(retrievedChunks);
262
+
263
+ statusIndicator.innerHTML = '<span class="loading">Generating response...</span>';
264
+
265
+ // Send generation request to worker
266
+ this.worker.postMessage({
267
+ type: 'generate',
268
+ payload: {
269
+ query,
270
+ context,
271
+ maxLength: 512
272
+ }
273
+ });
274
+ }
275
+
276
+ async retrieveRelevantChunks(query, topK = 5) {
277
+ if (this.embeddings.size === 0) return [];
278
+
279
+ // Get query embedding
280
+ const queryEmbedding = await this.getQueryEmbedding(query);
281
+
282
+ // Calculate similarities
283
+ const scores = [];
284
+ for (const [docId, docEmbeddings] of this.embeddings) {
285
+ for (let i = 0; i < docEmbeddings.length; i++) {
286
+ const similarity = this.cosineSimilarity(queryEmbedding, docEmbeddings[i]);
287
+ scores.push({
288
+ docId,
289
+ chunkIndex: i,
290
+ similarity,
291
+ text: this.documents.get(docId).chunks[i]
292
+ });
293
+ }
294
+ }
295
+
296
+ // Sort by similarity and return top K
297
+ return scores
298
+ .sort((a, b) => b.similarity - a.similarity)
299
+ .slice(0, topK);
300
+ }
301
+
302
+ async getQueryEmbedding(query) {
303
+ return new Promise((resolve) => {
304
+ const messageId = `embedding-${Date.now()}`;
305
+
306
+ const handleResponse = (e) => {
307
+ if (e.data.type === 'queryEmbeddingComplete' && e.data.payload.messageId === messageId) {
308
+ this.worker.removeEventListener('message', handleResponse);
309
+ resolve(e.data.payload.embedding);
310
+ }
311
+ };
312
+
313
+ this.worker.addEventListener('message', handleResponse);
314
+
315
+ this.worker.postMessage({
316
+ type: 'generateQueryEmbedding',
317
+ payload: { query, messageId }
318
+ });
319
+ });
320
+ }
321
+
322
+ cosineSimilarity(a, b) {
323
+ if (!a || !b) return 0;
324
+ const dotProduct = a.reduce((sum, val, i) => sum + val * b[i], 0);
325
+ const magnitudeA = Math.sqrt(a.reduce((sum, val) => sum + val * val, 0));
326
+ const magnitudeB = Math.sqrt(b.reduce((sum, val) => sum + val * val, 0));
327
+ return magnitudeA && magnitudeB ? dotProduct / (magnitudeA * magnitudeB) : 0;
328
+ }
329
+
330
+ getRelevantAxioms(query, topK = 10) {
331
+ // Simple keyword matching for relevant axioms
332
+ const queryWords = query.toLowerCase().split(/\s+/).filter(w => w.length > 3);
333
+
334
+ return this.axioms
335
+ .map(axiom => {
336
+ const axiomWords = axiom.text.toLowerCase().split(/\s+/);
337
+ const matches = queryWords.filter(w => axiomWords.includes(w)).length;
338
+ return { ...axiom, relevance: matches };
339
+ })
340
+ .filter(a => a.relevance > 0)
341
+ .sort((a, b) => b.relevance - a.relevance)
342
+ .slice(0, topK);
343
+ }
344
+
345
+ updateRetrievedContext(chunks) {
346
+ const contextDiv = document.getElementById('retrievedContext');
347
+ if (chunks.length === 0) {
348
+ contextDiv.innerHTML = '<p class="empty-state">No relevant context found.</p>';
349
+ return;
350
+ }
351
+
352
+ contextDiv.innerHTML = chunks.map((chunk, i) => `
353
+ <div class="context-item">
354
+ <div class="context-header">
355
+ <strong>Chunk ${i + 1}</strong>
356
+ <span class="similarity">Similarity: ${(chunk.similarity * 100).toFixed(1)}%</span>
357
+ </div>
358
+ <p class="context-text">${chunk.text}</p>
359
+ </div>
360
+ `).join('');
361
+ }
362
+
363
+ handleGenerationComplete({ response }) {
364
+ const generateBtn = document.getElementById('generateBtn');
365
+ const statusIndicator = document.getElementById('generationStatusIndicator');
366
+
367
+ generateBtn.disabled = false;
368
+ generateBtn.textContent = 'Generate Response';
369
+ statusIndicator.innerHTML = '';
370
+
371
+ document.getElementById('responseText').textContent = response;
372
+ document.getElementById('generationResult').classList.remove('hidden');
373
+ }
374
+
375
+ updateDocumentList() {
376
+ const listContainer = document.getElementById('documentList');
377
+
378
+ if (this.documents.size === 0) {
379
+ listContainer.innerHTML = '<p class="empty-state">No documents indexed yet. Upload files to begin.</p>';
380
+ return;
381
+ }
382
+
383
+ listContainer.innerHTML = Array.from(this.documents.values()).map(doc => `
384
+ <div class="document-item">
385
+ <div class="doc-header">
386
+ <strong>${doc.name}</strong>
387
+ <button class="remove-btn" onclick="app.removeDocument('${doc.id}')">Remove</button>
388
+ </div>
389
+ <div class="doc-info">
390
+ <span>${(doc.size / 1024).toFixed(1)} KB</span>
391
+ <span>${doc.chunks.length} chunks</span>
392
+ </div>
393
+ <div class="doc-preview">${doc.content.substring(0, 150)}...</div>
394
+ </div>
395
+ `).join('');
396
+ }
397
+
398
+ updateAxiomList() {
399
+ const listContainer = document.getElementById('axiomList');
400
+
401
+ if (this.axioms.length === 0) {
402
+ listContainer.innerHTML = '<p class="empty-state">No axioms extracted yet. Upload and index documents to extract axioms.</p>';
403
+ return;
404
+ }
405
+
406
+ listContainer.innerHTML = this.axioms.map((axiom, i) => `
407
+ <div class="axiom-item">
408
+ <div class="axiom-header">
409
+ <strong>Axiom ${i + 1}</strong>
410
+ <span class="source">from: ${axiom.source}</span>
411
+ </div>
412
+ <p class="axiom-text">${axiom.text}</p>
413
+ </div>
414
+ `).join('');
415
+ }
416
+
417
+ removeDocument(docId) {
418
+ this.documents.delete(docId);
419
+ this.embeddings.delete(docId);
420
+ this.axioms = this.axioms.filter(a => a.docId !== docId);
421
+ this.updateDocumentList();
422
+ this.updateAxiomList();
423
+ this.saveData();
424
+ this.showNotification('Document removed successfully.', 'success');
425
+ }
426
+
427
+ clearAllData() {
428
+ if (confirm('Are you sure you want to clear all indexed documents and axioms? This cannot be undone.')) {
429
+ this.documents.clear();
430
+ this.embeddings.clear();
431
+ this.axioms = [];
432
+ localStorage.removeItem('rag-documents');
433
+ localStorage.removeItem('rag-axioms');
434
+ this.updateDocumentList();
435
+ this.updateAxiomList();
436
+ this.showNotification('All data cleared.', 'success');
437
+ }
438
+ }
439
+
440
+ exportAxioms() {
441
+ if (this.axioms.length === 0) {
442
+ this.showNotification('No axioms to export.', 'warning');
443
+ return;
444
+ }
445
+
446
+ const exportData = {
447
+ exportedAt: new Date().toISOString(),
448
+ axioms: this.axioms,
449
+ documentCount: this.documents.size
450
+ };
451
+
452
+ const blob = new Blob([JSON.stringify(exportData, null, 2)], { type: 'application/json' });
453
+ const url = URL.createObjectURL(blob);
454
+ const a = document.createElement('a');
455
+ a.href = url;
456
+ a.download = `axioms-${new Date().toISOString().split('T')[0]}.json`;
457
+ a.click();
458
+ URL.revokeObjectURL(url);
459
+
460
+ this.showNotification('Axioms exported successfully.', 'success');
461
+ }
462
+
463
+ saveData() {
464
+ // Save to localStorage
465
+ const docsData = Array.from(this.documents.entries());
466
+ localStorage.setItem('rag-documents', JSON.stringify(docsData));
467
+ localStorage.setItem('rag-axioms', JSON.stringify(this.axioms));
468
+ }
469
+
470
+ loadSavedData() {
471
+ try {
472
+ // Load documents
473
+ const docsData = localStorage.getItem('rag-documents');
474
+ if (docsData) {
475
+ const docsArray = JSON.parse(docsData);
476
+ this.documents = new Map(docsArray);
477
+ this.updateDocumentList();
478
+ }
479
+
480
+ // Load axioms
481
+ const axiomsData = localStorage.getItem('rag-axioms');
482
+ if (axiomsData) {
483
+ this.axioms = JSON.parse(axiomsData);
484
+ this.updateAxiomList();
485
+ }
486
+ } catch (error) {
487
+ console.error('Error loading saved data:', error);
488
+ }
489
+ }
490
+
491
+ updateUI() {
492
+ // Enable generate button when models are ready
493
+ document.getElementById('generateBtn').disabled = false;
494
+ this.showNotification('Models loaded successfully! Ready to process documents.', 'success');
495
+ }
496
+
497
+ handleError(error) {
498
+ console.error('Application error:', error);
499
+ this.showNotification(`Error: ${error.message || 'Unknown error'}`, 'error');
500
+
501
+ // Reset UI state
502
+ const generateBtn = document.getElementById('generateBtn');
503
+ generateBtn.disabled = false;
504
+ generateBtn.textContent = 'Generate Response';
505
+ document.getElementById('generationStatusIndicator').innerHTML = '';
506
+ }
507
+
508
+ showNotification(message, type = 'info') {
509
+ const notification = document.createElement('div');
510
+ notification.className = `notification ${type}`;
511
+ notification.textContent = message;
512
+
513
+ document.body.appendChild(notification);
514
+
515
+ setTimeout(() => {
516
+ notification.classList.add('show');
517
+ }, 10);
518
+
519
+ setTimeout(() => {
520
+ notification.classList.remove('show');
521
+ setTimeout(() => notification.remove(), 300);
522
+ }, 3000);
523
+ }
524
+ }
525
+
526
+ // Initialize application
527
+ const app = new RAGApplication();