Spaces:
Configuration error
Configuration error
| // Simple in-memory document store with text chunking | |
| // In production, you'd use a vector database like Pinecone, Weaviate, etc. | |
| const documents = new Map(); | |
| function chunkText(text, chunkSize = 1000, overlap = 200) { | |
| const chunks = []; | |
| let start = 0; | |
| while (start < text.length) { | |
| const end = Math.min(start + chunkSize, text.length); | |
| chunks.push({ | |
| text: text.slice(start, end), | |
| start, | |
| end, | |
| }); | |
| start += chunkSize - overlap; | |
| } | |
| return chunks; | |
| } | |
| function addDocument(id, filename, text) { | |
| const chunks = chunkText(text); | |
| documents.set(id, { | |
| id, | |
| filename, | |
| fullText: text, | |
| chunks, | |
| addedAt: new Date().toISOString(), | |
| charCount: text.length, | |
| chunkCount: chunks.length, | |
| }); | |
| return { id, filename, chunkCount: chunks.length, charCount: text.length }; | |
| } | |
| function getDocument(id) { | |
| return documents.get(id); | |
| } | |
| function getAllDocuments() { | |
| return Array.from(documents.values()).map(d => ({ | |
| id: d.id, | |
| filename: d.filename, | |
| charCount: d.charCount, | |
| chunkCount: d.chunkCount, | |
| addedAt: d.addedAt, | |
| })); | |
| } | |
| function removeDocument(id) { | |
| return documents.delete(id); | |
| } | |
| function searchChunks(query, docIds) { | |
| // Simple keyword-based search (in production, use embeddings + vector similarity) | |
| const queryTerms = query.toLowerCase().split(/\s+/).filter(t => t.length > 2); | |
| const results = []; | |
| const targetDocs = docIds && docIds.length > 0 | |
| ? docIds.map(id => documents.get(id)).filter(Boolean) | |
| : Array.from(documents.values()); | |
| for (const doc of targetDocs) { | |
| for (const chunk of doc.chunks) { | |
| const lowerChunk = chunk.text.toLowerCase(); | |
| let score = 0; | |
| for (const term of queryTerms) { | |
| const occurrences = (lowerChunk.match(new RegExp(term, 'gi')) || []).length; | |
| score += occurrences; | |
| } | |
| if (score > 0) { | |
| results.push({ | |
| docId: doc.id, | |
| filename: doc.filename, | |
| text: chunk.text, | |
| score, | |
| }); | |
| } | |
| } | |
| } | |
| results.sort((a, b) => b.score - a.score); | |
| return results.slice(0, 10); // Top 10 relevant chunks | |
| } | |
| module.exports = { addDocument, getDocument, getAllDocuments, removeDocument, searchChunks }; | |