Spaces:

menikev
/

KnowYourRIght-Bot

Sleeping

App Files Files Community

menikev commited on Aug 17, 2025

Commit

0811500

verified ·

1 Parent(s): 62d7114

Update src/retriever.py

Browse files

Files changed (1) hide show

src/retriever.py +4 -13

src/retriever.py CHANGED Viewed

@@ -1,7 +1,5 @@
 #!/usr/bin/env python3
-"""Retriever with robust, Spaces-friendly auto-build."""
-import os  # ← ADD THIS LINE
 import sys
 from pathlib import Path
 from langchain_community.embeddings import HuggingFaceEmbeddings
@@ -9,7 +7,6 @@ from langchain_chroma import Chroma
 def import_ingest(project_root: Path):
-    """Import src/ingest_documents.py even if src isn't a package."""
     import importlib.util
     ingest_path = project_root / "src" / "ingest_documents.py"
     if not ingest_path.exists():
@@ -31,13 +28,9 @@ def open_db(persist_dir: Path):
 def get_retriever():
-    # Fix: Get project_root for import_ingest function
     project_root = Path(__file__).resolve().parent
-    persist_dir = Path(os.getenv("VECTOR_DB_DIR", "/home/user/.cache/vector_db"))
-    embed = HuggingFaceEmbeddings(model_name="BAAI/bge-small-en", model_kwargs={"device": "cpu"})
-    # If DB missing or clearly empty → build
     needs_build = not persist_dir.exists() or not any(persist_dir.iterdir())
     if needs_build:
         print("⚡ vector_db missing/empty → running ingestion...")
@@ -46,12 +39,10 @@ def get_retriever():
             raise RuntimeError("Cannot import ingester. Ensure src/ingest_documents.py exists.")
         ingest_main()
-    # Open DB
     vectordb = open_db(persist_dir)
     count = vectordb._collection.count()
     print(f"Found {count} documents in collection 'legal_documents'")
-    # If still zero, try a forced rebuild once more (covers "wrong folder" cases)
     if count == 0:
         print("⚠️ Collection empty after first load — forcing rebuild…")
         ingest_main = import_ingest(project_root)
@@ -70,4 +61,4 @@ def get_retriever():
             " - Ingest produced zero chunks (empty content)"
         )
-    return vectordb.as_retriever(search_kwargs={"k": 5})

 #!/usr/bin/env python3
+import os
 import sys
 from pathlib import Path
 from langchain_community.embeddings import HuggingFaceEmbeddings
 def import_ingest(project_root: Path):
     import importlib.util
     ingest_path = project_root / "src" / "ingest_documents.py"
     if not ingest_path.exists():
 def get_retriever():
     project_root = Path(__file__).resolve().parent
+    persist_dir = Path(os.getenv("VECTOR_DB_DIR", "vector_db"))
     needs_build = not persist_dir.exists() or not any(persist_dir.iterdir())
     if needs_build:
         print("⚡ vector_db missing/empty → running ingestion...")
             raise RuntimeError("Cannot import ingester. Ensure src/ingest_documents.py exists.")
         ingest_main()
     vectordb = open_db(persist_dir)
     count = vectordb._collection.count()
     print(f"Found {count} documents in collection 'legal_documents'")
     if count == 0:
         print("⚠️ Collection empty after first load — forcing rebuild…")
         ingest_main = import_ingest(project_root)
             " - Ingest produced zero chunks (empty content)"
         )
+    return vectordb.as_retriever(search_kwargs={"k": 3})