Spaces:
Runtime error
Runtime error
| import os | |
| import sys | |
| from langchain_community.vectorstores import FAISS | |
| from langchain_community.embeddings import HuggingFaceEmbeddings | |
| sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))) | |
| from rag.logger import get_logger # pylint: disable=import-error | |
| logger = get_logger(__name__) | |
| def load_vectorstore(): | |
| embeddings = HuggingFaceEmbeddings( | |
| model_name="sentence-transformers/all-MiniLM-L6-v2" | |
| ) | |
| base_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) | |
| vectorstore_path = os.path.join(base_dir, "data", "vectorstores") | |
| if not os.path.exists(vectorstore_path): | |
| raise FileNotFoundError( | |
| f"Vectorstore not found at: {vectorstore_path}\nRun ingest.py first." | |
| ) | |
| vectorstore = FAISS.load_local( | |
| vectorstore_path, | |
| embeddings, | |
| allow_dangerous_deserialization=True | |
| ) | |
| logger.info('Vector store loaded') | |
| return vectorstore | |
| def get_retriever(top_k: int = 5): | |
| vectorstore = load_vectorstore() | |
| # LangChain wrapper - preferred | |
| retriever = vectorstore.as_retriever( | |
| search_kwargs={"k": top_k} | |
| ) | |
| logger.info('Retrieval Complete') | |
| return retriever | |
| def search(query: str, top_k: int = 5): | |
| vectorstore = load_vectorstore() | |
| results = vectorstore.similarity_search(query, k=top_k) | |
| return results | |
| if __name__ == "__main__": | |
| query = "GITHUB REPO DATA" | |
| results = search(query, top_k=5) | |
| print("\n=== SIMILARITY RESULTS ===") | |
| for r in results: | |
| print("\n--- CHUNK ---") | |
| print(r.page_content) | |
| print("Metadata:", r.metadata) | |