dlaima commited on
Commit
bb4dc1a
·
verified ·
1 Parent(s): 455e971

Create retriever.py

Browse files
Files changed (1) hide show
  1. retriever.py +84 -0
retriever.py ADDED
@@ -0,0 +1,84 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from smolagents import Tool
2
+ from langchain_community.retrievers import BM25Retriever
3
+ from langchain.docstore.document import Document
4
+ import datasets
5
+
6
+
7
+ class GuestInfoRetrieverTool(Tool):
8
+ name = "guest_info_retriever"
9
+ description = "Retrieves detailed information about gala guests based on their name or relation."
10
+ inputs = {
11
+ "query": {
12
+ "type": "string",
13
+ "description": "The name or relation of the guest you want information about."
14
+ }
15
+ }
16
+ output_type = "string"
17
+
18
+ def __init__(self, docs):
19
+ self.is_initialized = False
20
+ self.retriever = BM25Retriever.from_documents(docs)
21
+
22
+
23
+ #def forward(self, query: str):
24
+ # results = self.retriever.get_relevant_documents(query)
25
+ # if results:
26
+ # return "\n\n".join([doc.page_content for doc in results[:3]])
27
+ # else:
28
+ # return "No matching guest information found."
29
+
30
+
31
+ def _generate_conversation_starter(self, doc: Document):
32
+ lines = doc.page_content.splitlines()
33
+ name = None
34
+ description = ""
35
+ for line in lines:
36
+ if line.startswith("Name:"):
37
+ name = line.replace("Name:", "").strip()
38
+ if line.startswith("Description:"):
39
+ description = line.replace("Description:", "").strip()
40
+
41
+ # Example heuristic: use keywords from description
42
+ # You could expand this with keyword extraction or simple NLP parsing
43
+ interests = []
44
+ for interest in ["art", "science", "sports", "music", "history", "technology", "travel", "literature"]:
45
+ if interest.lower() in description.lower():
46
+ interests.append(interest)
47
+
48
+ if interests:
49
+ return f"A good icebreaker could be: 'I heard you're into {interests[0]}. What's your favorite part about it?'"
50
+ else:
51
+ return "Try asking about their background—it sounds fascinating!"
52
+
53
+ def forward(self, query: str):
54
+ results = self.retriever.get_relevant_documents(query)
55
+ if results:
56
+ responses = []
57
+ for doc in results[:3]:
58
+ content = doc.page_content
59
+ starter = self._generate_conversation_starter(doc)
60
+ responses.append(f"{content}\n\n{starter}")
61
+ return "\n\n---\n\n".join(responses)
62
+ else:
63
+ return "No matching guest information found."
64
+
65
+ def load_guest_dataset():
66
+ # Load the dataset
67
+ guest_dataset = datasets.load_dataset("agents-course/unit3-invitees", split="train")
68
+
69
+ # Convert dataset entries into Document objects
70
+ docs = [
71
+ Document(
72
+ page_content="\n".join([
73
+ f"Name: {guest['name']}",
74
+ f"Relation: {guest['relation']}",
75
+ f"Description: {guest['description']}",
76
+ f"Email: {guest['email']}"
77
+ ]),
78
+ metadata={"name": guest["name"]}
79
+ )
80
+ for guest in guest_dataset
81
+ ]
82
+
83
+ # Return the tool
84
+ return GuestInfoRetrieverTool(docs)