#manabUtils.py from langchain_community.vectorstores import FAISS from langchain_huggingface import HuggingFaceEmbeddings from huggingface_hub import hf_hub_download import os def retrieve_chunks(repo_id, embedding_model="sentence-transformers/all-MiniLM-L6-v2"): """ Retreive chunks from HF dataset repo FAISS index """ try: # Step 1: Create embeddings (FIX: was missing) embeddings = HuggingFaceEmbeddings(model_name=embedding_model) # Step 2: Download FAISS files from HF Hub faiss_path = hf_hub_download( repo_id=repo_id, filename="index.faiss", repo_type="dataset" ) pkl_path = hf_hub_download( repo_id=repo_id, filename="index.pkl", repo_type="dataset" ) # Step 3: Load FAISS vectorstore (FIX: pass embeddings object, not string) folder_path = os.path.dirname(faiss_path) vectorstore = FAISS.load_local( folder_path=folder_path, embeddings=embeddings, # FIXED: was 'embedding_model' string allow_dangerous_deserialization=True ) # Step 4: Create retriever retriever = vectorstore.as_retriever(search_kwargs={"k": 3}) except Exception as e: print(f"Error in generate_qa_chain: {e}") return None return retriever def retrieve_chunks_GPC(): """ Retreive chunks from HF dataset for GPC """ embedding_model="sentence-transformers/all-MiniLM-L6-v2" repo_id="manabb/NRLGPC" try: # Step 1: Create embeddings (FIX: was missing) embeddings = HuggingFaceEmbeddings(model_name=embedding_model) # Step 2: Download FAISS files from HF Hub faiss_path = hf_hub_download( repo_id=repo_id, filename="faiss_gpc_goods_merged/index.faiss", repo_type="dataset" ) pkl_path = hf_hub_download( repo_id=repo_id, filename="faiss_gpc_goods_merged/index.pkl", repo_type="dataset" ) # Step 3: Load FAISS vectorstore (FIX: pass embeddings object, not string) folder_path = os.path.dirname(faiss_path) vectorstore = FAISS.load_local( folder_path=folder_path, embeddings=embeddings, # FIXED: was 'embedding_model' string allow_dangerous_deserialization=True ) # Step 4: Create retriever retriever = vectorstore.as_retriever(search_kwargs={"k": 3}) except Exception as e: print(f"Error in generate_qa_chain: {e}") return None return retriever