|
|
from llama_index.core import SimpleDirectoryReader
|
|
|
from llama_index.core.node_parser import SentenceSplitter
|
|
|
from llama_index.core import Settings
|
|
|
from llama_index.llms.openai import OpenAI
|
|
|
from llama_index.embeddings.openai import OpenAIEmbedding
|
|
|
from llama_index.core import SummaryIndex, VectorStoreIndex
|
|
|
from llama_index.core.tools import QueryEngineTool
|
|
|
from llama_index.core.query_engine.router_query_engine import RouterQueryEngine
|
|
|
from llama_index.core.selectors import LLMSingleSelector
|
|
|
|
|
|
from llama_index.core import SimpleDirectoryReader, VectorStoreIndex, SummaryIndex
|
|
|
from llama_index.core.node_parser import SentenceSplitter
|
|
|
from llama_index.core.tools import FunctionTool, QueryEngineTool
|
|
|
from llama_index.core.vector_stores import MetadataFilters, FilterCondition
|
|
|
from typing import List, Optional
|
|
|
|
|
|
|
|
|
|
|
|
def get_doc_tools(
|
|
|
file_path: str,
|
|
|
name: str,
|
|
|
) -> str:
|
|
|
"""Get vector query and summary query tools from a document."""
|
|
|
|
|
|
|
|
|
documents = SimpleDirectoryReader(input_files=[file_path]).load_data()
|
|
|
splitter = SentenceSplitter(chunk_size=1024)
|
|
|
nodes = splitter.get_nodes_from_documents(documents)
|
|
|
vector_index = VectorStoreIndex(nodes)
|
|
|
|
|
|
def vector_query(
|
|
|
query: str,
|
|
|
page_numbers: Optional[List[str]] = None
|
|
|
) -> str:
|
|
|
"""Use to answer questions over a given paper.
|
|
|
|
|
|
Useful if you have specific questions over the paper.
|
|
|
Always leave page_numbers as None UNLESS there is a specific page you want to search for.
|
|
|
|
|
|
Args:
|
|
|
query (str): the string query to be embedded.
|
|
|
page_numbers (Optional[List[str]]): Filter by set of pages. Leave as NONE
|
|
|
if we want to perform a vector search
|
|
|
over all pages. Otherwise, filter by the set of specified pages.
|
|
|
|
|
|
"""
|
|
|
|
|
|
page_numbers = page_numbers or []
|
|
|
metadata_dicts = [
|
|
|
{"key": "page_label", "value": p} for p in page_numbers
|
|
|
]
|
|
|
|
|
|
query_engine = vector_index.as_query_engine(
|
|
|
similarity_top_k=2,
|
|
|
filters=MetadataFilters.from_dicts(
|
|
|
metadata_dicts,
|
|
|
condition=FilterCondition.OR
|
|
|
)
|
|
|
)
|
|
|
response = query_engine.query(query)
|
|
|
return response
|
|
|
|
|
|
|
|
|
vector_query_tool = FunctionTool.from_defaults(
|
|
|
name=f"vector_tool_{name}",
|
|
|
fn=vector_query
|
|
|
)
|
|
|
|
|
|
summary_index = SummaryIndex(nodes)
|
|
|
summary_query_engine = summary_index.as_query_engine(
|
|
|
response_mode="tree_summarize",
|
|
|
use_async=True,
|
|
|
)
|
|
|
summary_tool = QueryEngineTool.from_defaults(
|
|
|
name=f"summary_tool_{name}",
|
|
|
query_engine=summary_query_engine,
|
|
|
description=(
|
|
|
f"Useful for summarization questions related to {name}"
|
|
|
),
|
|
|
)
|
|
|
|
|
|
return vector_query_tool, summary_tool |