# ========================================================== # document_registry.py — Lightweight Registry for Uploaded Docs # ========================================================== class DocumentRegistry: def __init__(self): # Internal registry for storing uploaded documents and metadata self._registry = {} def register(self, file_path, chunks, embeddings, index, toc_source="unknown"): """ Registers a new document in the in-memory registry. Args: file_path (str): Path to the uploaded or sample PDF. chunks (list): List of text chunks extracted from the document. embeddings (list): Corresponding vector embeddings. index (FAISS Index): Search index for this document. toc_source (str): How the Table of Contents was detected (heuristic/ai_inferred). """ import os name = os.path.basename(file_path) entry = { "name": name, "num_chunks": len(chunks), "toc_source": toc_source, "chunks": chunks, "embeddings": embeddings, "index": index } # Store or replace entry by filename self._registry[name] = entry print(f"📚 Registered {name} ({len(chunks)} chunks)") return name # Return the doc ID (filename) def list_docs(self): """Return a list of all registered documents with summary info.""" return list(self._registry.values()) def get_doc(self, name): """Retrieve full document entry by name (for active context switching).""" return self._registry.get(name) def clear(self): """Optional helper to clear all registry entries.""" self._registry.clear()