import os import sys from langchain_community.vectorstores import FAISS from langchain_community.embeddings import HuggingFaceEmbeddings sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))) from rag.logger import get_logger # pylint: disable=import-error logger = get_logger(__name__) def load_vectorstore(): embeddings = HuggingFaceEmbeddings( model_name="sentence-transformers/all-MiniLM-L6-v2" ) base_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) vectorstore_path = os.path.join(base_dir, "data", "vectorstores") if not os.path.exists(vectorstore_path): raise FileNotFoundError( f"Vectorstore not found at: {vectorstore_path}\nRun ingest.py first." ) vectorstore = FAISS.load_local( vectorstore_path, embeddings, allow_dangerous_deserialization=True ) logger.info('Vector store loaded') return vectorstore def get_retriever(top_k: int = 5): vectorstore = load_vectorstore() # LangChain wrapper - preferred retriever = vectorstore.as_retriever( search_kwargs={"k": top_k} ) logger.info('Retrieval Complete') return retriever def search(query: str, top_k: int = 5): vectorstore = load_vectorstore() results = vectorstore.similarity_search(query, k=top_k) return results if __name__ == "__main__": query = "GITHUB REPO DATA" results = search(query, top_k=5) print("\n=== SIMILARITY RESULTS ===") for r in results: print("\n--- CHUNK ---") print(r.page_content) print("Metadata:", r.metadata)