blades-helper / retriever.py
Dylan
added retriever, more random tables
486fca4
from langchain.docstore.document import Document
from langchain.text_splitter import RecursiveCharacterTextSplitter
from smolagents import Tool
from langchain_community.retrievers import BM25Retriever
from smolagents import CodeAgent, HfApiModel
class BladesInTheDarkRetrievalTool(Tool):
name = "BladesInTheDarkRetrievalTool"
description = "Uses semantic search to retrieve relevant setting details from Doskvol, the main city of Blades in the Dark, an RPG."
inputs = {
"query": {
"type": "string",
"description": "The query to perform. This should be a query for details on the setting.",
}
}
output_type = "string"
def __init__(self, docs, **kwargs):
super().__init__(**kwargs)
self.retriever = BM25Retriever.from_documents(
docs, k=5 # Retrieve the top 5 documents
)
def forward(self, query: str) -> str:
assert isinstance(query, str), "Your search query must be a string"
docs = self.retriever.invoke(
query,
)
return "\nRetrieved ideas:\n" + "".join(
[
f"\n\n===== Idea {str(i)} =====\n" + doc.page_content
for i, doc in enumerate(docs)
]
)
def prepare_docs(file_path: str):
# just one file for now
with open(file_path, "r") as f:
source_docs = [Document(page_content=f.read())]
splitter = RecursiveCharacterTextSplitter(
chunk_size=500,
chunk_overlap=50,
add_start_index=True,
strip_whitespace=True,
separators=["\n\n", "\n", ".", " ", ""],
)
docs_processed = splitter.split_documents(source_docs)
return docs_processed