Spaces:

Shubham170793
/

enterprise-knowledge-assistant

Sleeping

Shubham170793 commited on Oct 25, 2025

Commit

0907913

verified ·

1 Parent(s): dc571c1

Create document_registry.py

Files changed (1) hide show

src/document_registry.py ADDED Viewed

+# ==========================================================
+# document_registry.py — lightweight per-session library
+# ==========================================================
+import os, hashlib, time
+class DocumentRegistry:
+    def __init__(self):
+        self.docs = {}
+    def _hash_file(self, file_path):
+        """Compute short content hash to detect truly identical files"""
+        hasher = hashlib.sha256()
+        with open(file_path, "rb") as f:
+            while chunk := f.read(8192):
+                hasher.update(chunk)
+        return hasher.hexdigest()[:16]
+    def register(self, file_path, chunks, embeddings, index):
+        """Store one processed document"""
+        file_name = os.path.basename(file_path)
+        content_hash = self._hash_file(file_path)
+        doc_id = f"{file_name}_{content_hash}"
+        self.docs[doc_id] = {
+            "doc_name": file_name,
+            "hash": content_hash,
+            "path": file_path,
+            "chunks": chunks,
+            "embeddings": embeddings,
+            "index": index,
+            "timestamp": time.strftime("%Y-%m-%d %H:%M:%S"),
+        }
+        print(f"📚 Registered {file_name} ({len(chunks)} chunks)")
+        return doc_id
+    def get(self, doc_id):
+        return self.docs.get(doc_id)
+    def list_docs(self):
+        return list(self.docs.values())