Spaces:
Paused
Paused
| ##################################################### | |
| ### DOCUMENT PROCESSOR [AGENT] | |
| ##################################################### | |
| ### Jonathan Wang | |
| # ABOUT: | |
| # This creates an app to chat with PDFs. | |
| # This is the AGENT | |
| # which handles complex questions about the PDF. | |
| ##################################################### | |
| ### TODO Board: | |
| # https://docs.llamaindex.ai/en/stable/examples/agent/agent_runner/agent_runner_rag_controllable/#setup-human-in-the-loop-chat | |
| # Investigate ObjectIndex and retrievers? https://docs.llamaindex.ai/en/stable/examples/agent/multi_document_agents/ | |
| # https://docs.llamaindex.ai/en/stable/module_guides/storing/chat_stores/ | |
| ##################################################### | |
| ### IMPORTS | |
| from typing import List | |
| from streamlit import session_state as ss | |
| from llama_index.core.settings import Settings | |
| from llama_index.core.tools import QueryEngineTool, ToolMetadata | |
| from llama_index.core.query_engine import SubQuestionQueryEngine | |
| # Own Modules | |
| from full_doc import FullDocument | |
| ##################################################### | |
| ### CODE | |
| ALLOWED_DOCUMENT_TOOLS = ['engine', 'subquestion_engine'] | |
| ALLOWED_TOOLS = ALLOWED_DOCUMENT_TOOLS | |
| def _build_tool_from_fulldoc(fulldoc: FullDocument, tool_name: str) -> QueryEngineTool: | |
| """Given a Full Document, build a QueryEngineTool from the specified engine. | |
| Args: | |
| fulldoc (FullDocument): The FullDocument (doc + query engines) | |
| tool_name (str): The engine to use. | |
| Returns: | |
| QueryEngineTool: A query engine wrapper around the tool. | |
| """ | |
| if (tool_name.lower() not in ALLOWED_DOCUMENT_TOOLS): | |
| raise ValueError("`tool_name` must be one of {ALLOWED_DOCUMENT_TOOLS}") | |
| if (getattr(fulldoc, tool_name, None) is None): | |
| raise ValueError(f"`{tool_name}` must be created from the document first.") | |
| # Build Tool | |
| tool_description = '' | |
| if tool_name == 'engine': | |
| tool_description += 'A tool that answers simple questions about the following document:\n' + fulldoc.summary_oneline | |
| elif tool_name == 'subquestion_engine': | |
| tool_description += 'A tool that answers complex questions about the following document:\n' + fulldoc.summary_oneline | |
| tool = QueryEngineTool( | |
| query_engine=getattr(fulldoc, tool_name), | |
| metadata=ToolMetadata( | |
| name=tool_name, | |
| description=tool_description | |
| ), | |
| ) | |
| return tool | |
| def doclist_to_agent(doclist: List[FullDocument], fulldoc_tools_to_use: List[str]=['engine']) -> SubQuestionQueryEngine: # ReActAgent: | |
| # Agent Tools | |
| agent_tools = [] | |
| # Remove any tools that are not in the allowed list using | |
| tools_to_use = list(set(fulldoc_tools_to_use).intersection(set(ALLOWED_DOCUMENT_TOOLS))) | |
| if (len(tools_to_use) < len(fulldoc_tools_to_use)): | |
| removed_tools = set(fulldoc_tools_to_use) - set(ALLOWED_DOCUMENT_TOOLS) | |
| Warning(f"Tools {removed_tools} are not in the allowed list of tools. Skipping...") | |
| del removed_tools | |
| for tool in tools_to_use: | |
| for doc in doclist: | |
| agent_tools.append(_build_tool_from_fulldoc(doc, tool)) | |
| # Agent | |
| # agent = ReActAgent.from_tools( | |
| agent = SubQuestionQueryEngine.from_defaults( | |
| # tools=agent_tools, | |
| query_engine_tools=agent_tools, | |
| llm=Settings.llm or ss.llm, | |
| verbose=True, | |
| # max_iterations=5 | |
| ) | |
| return agent | |