Alfred / retriever.py
wishmi1234's picture
Create retriever.py
f8c43a6 verified
raw
history blame contribute delete
837 Bytes
# Step 1: Load and Prepare the Dataset
# First, we need to transform our raw guest data into a format that’s optimized for retrieval.
# We will use the Hugging Face datasets library to load the dataset and convert it into a list of Document objects from the langchain.docstore.document module.
import datasets
from langchain_core.documents import Document
# Load the dataset
guest_dataset = datasets.load_dataset("agents-course/unit3-invitees", split="train")
# Convert dataset entries into Document objects
docs = [
Document(
page_content="\n".join([
f"Name: {guest['name']}",
f"Relation: {guest['relation']}",
f"Description: {guest['description']}",
f"Email: {guest['email']}"
]),
metadata={"name": guest["name"]}
)
for guest in guest_dataset
]