File size: 1,784 Bytes
0907913 d5790e2 0907913 d5790e2 0907913 d5790e2 0907913 d5790e2 0907913 d5790e2 0907913 d5790e2 0907913 d5790e2 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 |
# ==========================================================
# document_registry.py β Lightweight Registry for Uploaded Docs
# ==========================================================
class DocumentRegistry:
def __init__(self):
# Internal registry for storing uploaded documents and metadata
self._registry = {}
def register(self, file_path, chunks, embeddings, index, toc_source="unknown"):
"""
Registers a new document in the in-memory registry.
Args:
file_path (str): Path to the uploaded or sample PDF.
chunks (list): List of text chunks extracted from the document.
embeddings (list): Corresponding vector embeddings.
index (FAISS Index): Search index for this document.
toc_source (str): How the Table of Contents was detected (heuristic/ai_inferred).
"""
import os
name = os.path.basename(file_path)
entry = {
"name": name,
"num_chunks": len(chunks),
"toc_source": toc_source,
"chunks": chunks,
"embeddings": embeddings,
"index": index
}
# Store or replace entry by filename
self._registry[name] = entry
print(f"π Registered {name} ({len(chunks)} chunks)")
return name # Return the doc ID (filename)
def list_docs(self):
"""Return a list of all registered documents with summary info."""
return list(self._registry.values())
def get_doc(self, name):
"""Retrieve full document entry by name (for active context switching)."""
return self._registry.get(name)
def clear(self):
"""Optional helper to clear all registry entries."""
self._registry.clear()
|