manabb commited on
Commit
6def0f7
·
verified ·
1 Parent(s): 448e54b

Update manabUtils.py

Browse files
Files changed (1) hide show
  1. manabUtils.py +39 -1
manabUtils.py CHANGED
@@ -1,4 +1,4 @@
1
- #to be updated
2
  from langchain_community.vectorstores import FAISS
3
  from langchain_huggingface import HuggingFaceEmbeddings
4
  from huggingface_hub import hf_hub_download
@@ -32,6 +32,44 @@ def retrieve_chunks(repo_id, embedding_model="sentence-transformers/all-MiniLM-L
32
  allow_dangerous_deserialization=True
33
  )
34
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
35
  # Step 4: Create retriever
36
  retriever = vectorstore.as_retriever(search_kwargs={"k": 3})
37
  except Exception as e:
 
1
+ #manabUtils.py
2
  from langchain_community.vectorstores import FAISS
3
  from langchain_huggingface import HuggingFaceEmbeddings
4
  from huggingface_hub import hf_hub_download
 
32
  allow_dangerous_deserialization=True
33
  )
34
 
35
+ # Step 4: Create retriever
36
+ retriever = vectorstore.as_retriever(search_kwargs={"k": 3})
37
+ except Exception as e:
38
+ print(f"Error in generate_qa_chain: {e}")
39
+ return None
40
+ return retriever
41
+
42
+ def retrieve_chunks_GPC():
43
+ """
44
+ Retreive chunks from HF dataset for GPC
45
+ """
46
+ embedding_model="sentence-transformers/all-MiniLM-L6-v2"
47
+ repo_id="manabb/NRLGPC"
48
+
49
+ try:
50
+ # Step 1: Create embeddings (FIX: was missing)
51
+ embeddings = HuggingFaceEmbeddings(model_name=embedding_model)
52
+
53
+ # Step 2: Download FAISS files from HF Hub
54
+ faiss_path = hf_hub_download(
55
+ repo_id=repo_id,
56
+ filename="faiss_gpc_goods_merged/index.faiss",
57
+ repo_type="dataset"
58
+ )
59
+ pkl_path = hf_hub_download(
60
+ repo_id=repo_id,
61
+ filename="faiss_gpc_goods_merged/index.pkl",
62
+ repo_type="dataset"
63
+ )
64
+
65
+ # Step 3: Load FAISS vectorstore (FIX: pass embeddings object, not string)
66
+ folder_path = os.path.dirname(faiss_path)
67
+ vectorstore = FAISS.load_local(
68
+ folder_path=folder_path,
69
+ embeddings=embeddings, # FIXED: was 'embedding_model' string
70
+ allow_dangerous_deserialization=True
71
+ )
72
+
73
  # Step 4: Create retriever
74
  retriever = vectorstore.as_retriever(search_kwargs={"k": 3})
75
  except Exception as e: