Spaces:
Runtime error
Runtime error
| from llama_index.core import SimpleDirectoryReader | |
| from llama_index.core.node_parser import SentenceSplitter | |
| from llama_index.core import Settings | |
| from llama_index.llms.openai import OpenAI | |
| from llama_index.embeddings.openai import OpenAIEmbedding | |
| from llama_index.core import SummaryIndex, VectorStoreIndex | |
| from llama_index.core.tools import QueryEngineTool | |
| from llama_index.core.query_engine.router_query_engine import RouterQueryEngine | |
| from llama_index.core.selectors import LLMSingleSelector | |
| from llama_index.core import SimpleDirectoryReader, VectorStoreIndex, SummaryIndex | |
| from llama_index.core.node_parser import SentenceSplitter | |
| from llama_index.core.tools import FunctionTool, QueryEngineTool | |
| from llama_index.core.vector_stores import MetadataFilters, FilterCondition | |
| from typing import List, Optional | |
| def get_doc_tools( | |
| file_path: str, | |
| name: str, | |
| ) -> str: | |
| """Get vector query and summary query tools from a document.""" | |
| # load documents | |
| documents = SimpleDirectoryReader(input_files=[file_path]).load_data() | |
| splitter = SentenceSplitter(chunk_size=1024) | |
| nodes = splitter.get_nodes_from_documents(documents) | |
| vector_index = VectorStoreIndex(nodes) | |
| def vector_query( | |
| query: str, | |
| page_numbers: Optional[List[str]] = None | |
| ) -> str: | |
| """Use to answer questions over a given paper. | |
| Useful if you have specific questions over the paper. | |
| Always leave page_numbers as None UNLESS there is a specific page you want to search for. | |
| Args: | |
| query (str): the string query to be embedded. | |
| page_numbers (Optional[List[str]]): Filter by set of pages. Leave as NONE | |
| if we want to perform a vector search | |
| over all pages. Otherwise, filter by the set of specified pages. | |
| """ | |
| page_numbers = page_numbers or [] | |
| metadata_dicts = [ | |
| {"key": "page_label", "value": p} for p in page_numbers | |
| ] | |
| query_engine = vector_index.as_query_engine( | |
| similarity_top_k=2, | |
| filters=MetadataFilters.from_dicts( | |
| metadata_dicts, | |
| condition=FilterCondition.OR | |
| ) | |
| ) | |
| response = query_engine.query(query) | |
| return response | |
| vector_query_tool = FunctionTool.from_defaults( | |
| name=f"vector_tool_{name}", | |
| fn=vector_query | |
| ) | |
| summary_index = SummaryIndex(nodes) | |
| summary_query_engine = summary_index.as_query_engine( | |
| response_mode="tree_summarize", | |
| use_async=True, | |
| ) | |
| summary_tool = QueryEngineTool.from_defaults( | |
| name=f"summary_tool_{name}", | |
| query_engine=summary_query_engine, | |
| description=( | |
| f"Useful for summarization questions related to {name}" | |
| ), | |
| ) | |
| return vector_query_tool, summary_tool |