enterprise-knowledge-assistant / src /document_registry.py
Shubham170793's picture
Update src/document_registry.py
d5790e2 verified
# ==========================================================
# document_registry.py β€” Lightweight Registry for Uploaded Docs
# ==========================================================
class DocumentRegistry:
def __init__(self):
# Internal registry for storing uploaded documents and metadata
self._registry = {}
def register(self, file_path, chunks, embeddings, index, toc_source="unknown"):
"""
Registers a new document in the in-memory registry.
Args:
file_path (str): Path to the uploaded or sample PDF.
chunks (list): List of text chunks extracted from the document.
embeddings (list): Corresponding vector embeddings.
index (FAISS Index): Search index for this document.
toc_source (str): How the Table of Contents was detected (heuristic/ai_inferred).
"""
import os
name = os.path.basename(file_path)
entry = {
"name": name,
"num_chunks": len(chunks),
"toc_source": toc_source,
"chunks": chunks,
"embeddings": embeddings,
"index": index
}
# Store or replace entry by filename
self._registry[name] = entry
print(f"πŸ“š Registered {name} ({len(chunks)} chunks)")
return name # Return the doc ID (filename)
def list_docs(self):
"""Return a list of all registered documents with summary info."""
return list(self._registry.values())
def get_doc(self, name):
"""Retrieve full document entry by name (for active context switching)."""
return self._registry.get(name)
def clear(self):
"""Optional helper to clear all registry entries."""
self._registry.clear()