Spaces:
Running
Running
| #manabUtils.py | |
| from langchain_community.vectorstores import FAISS | |
| from langchain_huggingface import HuggingFaceEmbeddings | |
| from huggingface_hub import hf_hub_download | |
| import os | |
| def retrieve_chunks(repo_id, embedding_model="sentence-transformers/all-MiniLM-L6-v2"): | |
| """ | |
| Retreive chunks from HF dataset repo FAISS index | |
| """ | |
| try: | |
| # Step 1: Create embeddings (FIX: was missing) | |
| embeddings = HuggingFaceEmbeddings(model_name=embedding_model) | |
| # Step 2: Download FAISS files from HF Hub | |
| faiss_path = hf_hub_download( | |
| repo_id=repo_id, | |
| filename="index.faiss", | |
| repo_type="dataset" | |
| ) | |
| pkl_path = hf_hub_download( | |
| repo_id=repo_id, | |
| filename="index.pkl", | |
| repo_type="dataset" | |
| ) | |
| # Step 3: Load FAISS vectorstore (FIX: pass embeddings object, not string) | |
| folder_path = os.path.dirname(faiss_path) | |
| vectorstore = FAISS.load_local( | |
| folder_path=folder_path, | |
| embeddings=embeddings, # FIXED: was 'embedding_model' string | |
| allow_dangerous_deserialization=True | |
| ) | |
| # Step 4: Create retriever | |
| retriever = vectorstore.as_retriever(search_kwargs={"k": 3}) | |
| except Exception as e: | |
| print(f"Error in generate_qa_chain: {e}") | |
| return None | |
| return retriever | |
| def retrieve_chunks_GPC(): | |
| """ | |
| Retreive chunks from HF dataset for GPC | |
| """ | |
| embedding_model="sentence-transformers/all-MiniLM-L6-v2" | |
| repo_id="manabb/NRLGPC" | |
| try: | |
| # Step 1: Create embeddings (FIX: was missing) | |
| embeddings = HuggingFaceEmbeddings(model_name=embedding_model) | |
| # Step 2: Download FAISS files from HF Hub | |
| faiss_path = hf_hub_download( | |
| repo_id=repo_id, | |
| filename="faiss_gpc_goods_merged/index.faiss", | |
| repo_type="dataset" | |
| ) | |
| pkl_path = hf_hub_download( | |
| repo_id=repo_id, | |
| filename="faiss_gpc_goods_merged/index.pkl", | |
| repo_type="dataset" | |
| ) | |
| # Step 3: Load FAISS vectorstore (FIX: pass embeddings object, not string) | |
| folder_path = os.path.dirname(faiss_path) | |
| vectorstore = FAISS.load_local( | |
| folder_path=folder_path, | |
| embeddings=embeddings, # FIXED: was 'embedding_model' string | |
| allow_dangerous_deserialization=True | |
| ) | |
| # Step 4: Create retriever | |
| retriever = vectorstore.as_retriever(search_kwargs={"k": 3}) | |
| except Exception as e: | |
| print(f"Error in generate_qa_chain: {e}") | |
| return None | |
| return retriever |