manabb commited on
Commit
d216b1f
·
verified ·
1 Parent(s): 219dcac

Update manabUtils.py

Browse files
Files changed (1) hide show
  1. manabUtils.py +40 -1
manabUtils.py CHANGED
@@ -1 +1,40 @@
1
- #to be updated
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #to be updated
2
+ from langchain_community.vectorstores import FAISS
3
+ from langchain_huggingface import HuggingFaceEmbeddings
4
+ from huggingface_hub import hf_hub_download
5
+ import os
6
+
7
+ def retrieve_chunks(repo_id, embedding_model="sentence-transformers/all-MiniLM-L6-v2"):
8
+ """
9
+ Retreive chunks from HF dataset repo FAISS index
10
+ """
11
+ try:
12
+ # Step 1: Create embeddings (FIX: was missing)
13
+ embeddings = HuggingFaceEmbeddings(model_name=embedding_model)
14
+
15
+ # Step 2: Download FAISS files from HF Hub
16
+ faiss_path = hf_hub_download(
17
+ repo_id=repo_id,
18
+ filename="index.faiss",
19
+ repo_type="dataset"
20
+ )
21
+ pkl_path = hf_hub_download(
22
+ repo_id=repo_id,
23
+ filename="index.pkl",
24
+ repo_type="dataset"
25
+ )
26
+
27
+ # Step 3: Load FAISS vectorstore (FIX: pass embeddings object, not string)
28
+ folder_path = os.path.dirname(faiss_path)
29
+ vectorstore = FAISS.load_local(
30
+ folder_path=folder_path,
31
+ embeddings=embeddings, # FIXED: was 'embedding_model' string
32
+ allow_dangerous_deserialization=True
33
+ )
34
+
35
+ # Step 4: Create retriever
36
+ retriever = vectorstore.as_retriever(search_kwargs={"k": 3})
37
+ except Exception as e:
38
+ print(f"Error in generate_qa_chain: {e}")
39
+ return None
40
+ return retriever