GenAI_Career_Agent / rag /retieval.py
Raheel Abdul Rehman
Updates
ae03d68
import os
import sys
from langchain_community.vectorstores import FAISS
from langchain_community.embeddings import HuggingFaceEmbeddings
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
from rag.logger import get_logger # pylint: disable=import-error
logger = get_logger(__name__)
def load_vectorstore():
embeddings = HuggingFaceEmbeddings(
model_name="sentence-transformers/all-MiniLM-L6-v2"
)
base_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
vectorstore_path = os.path.join(base_dir, "data", "vectorstores")
if not os.path.exists(vectorstore_path):
raise FileNotFoundError(
f"Vectorstore not found at: {vectorstore_path}\nRun ingest.py first."
)
vectorstore = FAISS.load_local(
vectorstore_path,
embeddings,
allow_dangerous_deserialization=True
)
logger.info('Vector store loaded')
return vectorstore
def get_retriever(top_k: int = 5):
vectorstore = load_vectorstore()
# LangChain wrapper - preferred
retriever = vectorstore.as_retriever(
search_kwargs={"k": top_k}
)
logger.info('Retrieval Complete')
return retriever
def search(query: str, top_k: int = 5):
vectorstore = load_vectorstore()
results = vectorstore.similarity_search(query, k=top_k)
return results
if __name__ == "__main__":
query = "GITHUB REPO DATA"
results = search(query, top_k=5)
print("\n=== SIMILARITY RESULTS ===")
for r in results:
print("\n--- CHUNK ---")
print(r.page_content)
print("Metadata:", r.metadata)