Spaces:
Configuration error
Configuration error
oremaz
commited on
Commit
·
ffafa4c
1
Parent(s):
cac7820
Update agent.py
Browse files
agent.py
CHANGED
|
@@ -159,17 +159,34 @@ read_and_parse_tool = FunctionTool.from_defaults(
|
|
| 159 |
)
|
| 160 |
)
|
| 161 |
|
| 162 |
-
def create_rag_tool_fn(documents: List[Document]) -> QueryEngineTool:
|
| 163 |
"""
|
| 164 |
-
Creates a RAG query engine tool from
|
| 165 |
-
|
| 166 |
-
|
|
|
|
|
|
|
| 167 |
Args:
|
| 168 |
-
documents: A list of LlamaIndex Document objects from
|
| 169 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 170 |
Returns:
|
| 171 |
-
|
| 172 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 173 |
if not documents:
|
| 174 |
return None
|
| 175 |
|
|
@@ -215,6 +232,10 @@ def create_rag_tool_fn(documents: List[Document]) -> QueryEngineTool:
|
|
| 215 |
"The input is a natural language question about the documents' content."
|
| 216 |
)
|
| 217 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 218 |
|
| 219 |
return rag_engine_tool
|
| 220 |
|
|
@@ -222,13 +243,14 @@ create_rag_tool = FunctionTool.from_defaults(
|
|
| 222 |
fn=create_rag_tool_fn,
|
| 223 |
name="create_rag_tool",
|
| 224 |
description=(
|
| 225 |
-
"Use this tool to
|
| 226 |
-
"Input
|
| 227 |
-
"
|
| 228 |
-
"
|
|
|
|
|
|
|
| 229 |
)
|
| 230 |
)
|
| 231 |
-
|
| 232 |
# 1. Create the base DuckDuckGo search tool from the official spec.
|
| 233 |
# This tool returns text summaries of search results, not just URLs.
|
| 234 |
base_duckduckgo_tool = DuckDuckGoSearchToolSpec().to_tool_list()[1]
|
|
@@ -295,15 +317,16 @@ def create_forced_rag_pipeline():
|
|
| 295 |
forced_rag_pipeline = create_forced_rag_pipeline()
|
| 296 |
|
| 297 |
# Remplacer les tools individuels par le pipeline
|
| 298 |
-
|
| 299 |
fn=lambda input_path: forced_rag_pipeline.run(input_path),
|
| 300 |
name="process_docs_urls_tool",
|
| 301 |
description=(
|
| 302 |
-
"This is the PRIMARY and
|
| 303 |
-
"It AUTOMATICALLY processes
|
| 304 |
-
"
|
| 305 |
-
"
|
| 306 |
-
"
|
|
|
|
| 307 |
)
|
| 308 |
)
|
| 309 |
|
|
|
|
| 159 |
)
|
| 160 |
)
|
| 161 |
|
| 162 |
+
def create_rag_tool_fn(documents: List[Document], query: str = None) -> Union[QueryEngineTool, str]:
|
| 163 |
"""
|
| 164 |
+
Creates a RAG query engine tool from documents with advanced indexing and querying capabilities.
|
| 165 |
+
|
| 166 |
+
This function implements a sophisticated RAG pipeline using hierarchical or sentence-window parsing
|
| 167 |
+
depending on document count, vector indexing, and reranking for optimal information retrieval.
|
| 168 |
+
|
| 169 |
Args:
|
| 170 |
+
documents (List[Document]): A list of LlamaIndex Document objects from read_and_parse_tool.
|
| 171 |
+
Must not be empty to create a valid RAG engine.
|
| 172 |
+
query (str, optional): If provided, immediately queries the created RAG engine and returns
|
| 173 |
+
the answer as a string. If None, returns the QueryEngineTool for later use.
|
| 174 |
+
Defaults to None.
|
| 175 |
+
|
| 176 |
Returns:
|
| 177 |
+
Union[QueryEngineTool, str]:
|
| 178 |
+
- QueryEngineTool: When query=None, returns a tool configured for agent use with
|
| 179 |
+
advanced reranking and similarity search capabilities.
|
| 180 |
+
- str: When query is provided, returns the direct answer from the RAG engine.
|
| 181 |
+
- None: When documents list is empty.
|
| 182 |
+
|
| 183 |
+
Examples:
|
| 184 |
+
Create a RAG tool for later use:
|
| 185 |
+
>>> rag_tool = create_rag_tool_fn(documents)
|
| 186 |
+
|
| 187 |
+
Get immediate answer from documents:
|
| 188 |
+
>>> answer = create_rag_tool_fn(documents, query="What is the main topic?")
|
| 189 |
+
"""
|
| 190 |
if not documents:
|
| 191 |
return None
|
| 192 |
|
|
|
|
| 232 |
"The input is a natural language question about the documents' content."
|
| 233 |
)
|
| 234 |
)
|
| 235 |
+
|
| 236 |
+
if query :
|
| 237 |
+
result = rag_engine_tool.query_engine.query(query)
|
| 238 |
+
return str(result)
|
| 239 |
|
| 240 |
return rag_engine_tool
|
| 241 |
|
|
|
|
| 243 |
fn=create_rag_tool_fn,
|
| 244 |
name="create_rag_tool",
|
| 245 |
description=(
|
| 246 |
+
"Use this tool to build a Retrieval Augmented Generation (RAG) engine from documents AND optionally query it immediately. "
|
| 247 |
+
"Input: documents (list of documents or paths) and optional query parameter. "
|
| 248 |
+
"If no query is provided: creates and returns a RAG query engine tool for later use. "
|
| 249 |
+
"If query is provided: creates the RAG engine AND immediately returns the answer to your question. "
|
| 250 |
+
"This dual-mode tool enables both RAG engine creation and direct question-answering in one step. "
|
| 251 |
+
"Use with query parameter when you want immediate answers from documents, or without query to create a reusable engine."
|
| 252 |
)
|
| 253 |
)
|
|
|
|
| 254 |
# 1. Create the base DuckDuckGo search tool from the official spec.
|
| 255 |
# This tool returns text summaries of search results, not just URLs.
|
| 256 |
base_duckduckgo_tool = DuckDuckGoSearchToolSpec().to_tool_list()[1]
|
|
|
|
| 317 |
forced_rag_pipeline = create_forced_rag_pipeline()
|
| 318 |
|
| 319 |
# Remplacer les tools individuels par le pipeline
|
| 320 |
+
information_retrieval_tool = FunctionTool.from_defaults(
|
| 321 |
fn=lambda input_path: forced_rag_pipeline.run(input_path),
|
| 322 |
name="process_docs_urls_tool",
|
| 323 |
description=(
|
| 324 |
+
"This tool is the PRIMARY and MOST EFFECTIVE method for extracting and retrieving information from URLs or documents. "
|
| 325 |
+
"It AUTOMATICALLY processes any given web pages, PDFs, or document files by first using read_and_parse to fully extract and parse content. "
|
| 326 |
+
"Then, it creates a powerful Retrieval Augmented Generation (RAG) query engine optimized for semantic search and precise information retrieval. "
|
| 327 |
+
"Finally, it applies the RAG engine to answer queries directly, providing efficient and accurate results. "
|
| 328 |
+
"This tool is specifically designed to handle diverse document types and web content, ensuring superior extraction and querying capabilities. "
|
| 329 |
+
"Avoid manual page access or ad-hoc parsing; always use this tool for best performance and reliability in information extraction and question answering."
|
| 330 |
)
|
| 331 |
)
|
| 332 |
|