PARTHA181098 commited on
Commit
eb3816e
·
verified ·
1 Parent(s): 10ac869

Create rag.py

Browse files
Files changed (1) hide show
  1. rag.py +48 -0
rag.py ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ # from langchain.schema import Document
3
+ from langchain_core.documents import Document
4
+ # from langchain.embeddings import HuggingFaceEmbeddings
5
+ from langchain_huggingface import HuggingFaceEmbeddings
6
+ # from langchain.vectorstores import FAISS
7
+ from langchain_community.vectorstores import FAISS
8
+
9
+
10
+ def load_student_documents(csv_path: str):
11
+ df = pd.read_csv(csv_path)
12
+ documents = []
13
+ for _, row in df.iterrows():
14
+ content = (
15
+ f"Student record. "
16
+ f"Register number {row['Register Number']}. "
17
+ f"Name {row['Name']}. "
18
+ f"Email {row['EmailID']}. "
19
+ f"Department {row['Department']}. "
20
+ f"Year {row['Year of Study']}. "
21
+ f"Hobby {row['Hobby']}. "
22
+ f"Study level {row['Study Level']}. "
23
+ f"Sleeptime {row['Sleeptime']}."
24
+ )
25
+
26
+ documents.append(
27
+ Document(
28
+ page_content=content.strip(),
29
+ metadata={"register_number": row["Register Number"]}
30
+ )
31
+ )
32
+ return documents
33
+
34
+ def create_vectorstore(documents):
35
+ embeddings = HuggingFaceEmbeddings(
36
+ model_name="sentence-transformers/all-MiniLM-L6-v2"
37
+ )
38
+ vectorstore = FAISS.from_documents(
39
+ documents=documents,
40
+ embedding=embeddings
41
+ )
42
+ return vectorstore
43
+
44
+ def get_retriever(vectorstore):
45
+ return vectorstore.as_retriever(
46
+ search_type="mmr",
47
+ search_kwargs={"k": 6, "lambda_mult": 0.7}
48
+ )