File size: 1,784 Bytes
0907913
d5790e2
0907913
 
 
 
d5790e2
 
0907913
d5790e2
 
 
0907913
d5790e2
 
 
 
 
 
 
 
 
 
 
 
 
0907913
 
d5790e2
0907913
 
d5790e2
 
 
 
0907913
 
d5790e2
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
# ==========================================================
# document_registry.py β€” Lightweight Registry for Uploaded Docs
# ==========================================================

class DocumentRegistry:
    def __init__(self):
        # Internal registry for storing uploaded documents and metadata
        self._registry = {}

    def register(self, file_path, chunks, embeddings, index, toc_source="unknown"):
        """
        Registers a new document in the in-memory registry.

        Args:
            file_path (str): Path to the uploaded or sample PDF.
            chunks (list): List of text chunks extracted from the document.
            embeddings (list): Corresponding vector embeddings.
            index (FAISS Index): Search index for this document.
            toc_source (str): How the Table of Contents was detected (heuristic/ai_inferred).
        """
        import os
        name = os.path.basename(file_path)
        entry = {
            "name": name,
            "num_chunks": len(chunks),
            "toc_source": toc_source,
            "chunks": chunks,
            "embeddings": embeddings,
            "index": index
        }

        # Store or replace entry by filename
        self._registry[name] = entry
        print(f"πŸ“š Registered {name} ({len(chunks)} chunks)")
        return name  # Return the doc ID (filename)

    def list_docs(self):
        """Return a list of all registered documents with summary info."""
        return list(self._registry.values())

    def get_doc(self, name):
        """Retrieve full document entry by name (for active context switching)."""
        return self._registry.get(name)

    def clear(self):
        """Optional helper to clear all registry entries."""
        self._registry.clear()