import datasets from langchain.docstore.document import Document from langchain.tools import Tool from langchain_community.retrievers import BM25Retriever # 加载数据集 guest_dataset = datasets.load_dataset( "agents-course/unit3-invitees", split="train") # 转换为 Document 对象 docs = [ Document( page_content="\n".join([ f"Name: {guest['name']}", f"Relation: {guest['relation']}", f"Description: {guest['description']}", f"Email: {guest['email']}" ]), metadata={"name": guest["name"]} ) for guest in guest_dataset ] bm25_retriever = BM25Retriever.from_documents(docs) def extract_text(query: str) -> str: """Retrieves detailed information about gala guests based on their name or relation.""" results = bm25_retriever.invoke(query) if results: return "\n\n".join([doc.page_content for doc in results[:3]]) else: return "No matching guest information found." guest_info_tool = Tool( name="guest_info_retriever", func=extract_text, description="Retrieves detailed information about gala guests based on their name or relation." )