wishmi1234 commited on
Commit
f8c43a6
·
verified ·
1 Parent(s): 5132b6b

Create retriever.py

Browse files

Creating retriever.py

Files changed (1) hide show
  1. retriever.py +24 -0
retriever.py ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Step 1: Load and Prepare the Dataset
2
+ # First, we need to transform our raw guest data into a format that’s optimized for retrieval.
3
+
4
+ # We will use the Hugging Face datasets library to load the dataset and convert it into a list of Document objects from the langchain.docstore.document module.
5
+
6
+ import datasets
7
+ from langchain_core.documents import Document
8
+
9
+ # Load the dataset
10
+ guest_dataset = datasets.load_dataset("agents-course/unit3-invitees", split="train")
11
+
12
+ # Convert dataset entries into Document objects
13
+ docs = [
14
+ Document(
15
+ page_content="\n".join([
16
+ f"Name: {guest['name']}",
17
+ f"Relation: {guest['relation']}",
18
+ f"Description: {guest['description']}",
19
+ f"Email: {guest['email']}"
20
+ ]),
21
+ metadata={"name": guest["name"]}
22
+ )
23
+ for guest in guest_dataset
24
+ ]