Spaces:
Build error
Build error
ernani
commited on
Commit
·
24b20b9
1
Parent(s):
14e6771
improving search and answers
Browse files- manage_agents.py +65 -33
- tools.py +1 -3
manage_agents.py
CHANGED
|
@@ -50,9 +50,12 @@ class ContentTypeAgent:
|
|
| 50 |
- python: If the question refers to a Python file or contains a task ID for Python
|
| 51 |
|
| 52 |
Consider these special cases:
|
| 53 |
-
1. If the question
|
| 54 |
-
|
| 55 |
-
|
|
|
|
|
|
|
|
|
|
| 56 |
Return only the type and nothing else."""
|
| 57 |
)
|
| 58 |
|
|
@@ -238,6 +241,8 @@ class ContentTranslateAgent:
|
|
| 238 |
2. If it requires specialized knowledge or real-time data, respond with 'TOOLS_REQUIRED'
|
| 239 |
3. For simple questions like "what is 2+2?" or "reverse this text", provide the answer directly
|
| 240 |
4. For questions about specific content, data files, or URLs, respond with 'TOOLS_REQUIRED'
|
|
|
|
|
|
|
| 241 |
|
| 242 |
When answering, provide ONLY the precise answer requested.
|
| 243 |
Do not include explanations, steps, reasoning, or additional text.
|
|
@@ -263,7 +268,7 @@ class MainAgent:
|
|
| 263 |
"""Main agent orchestrating the workflow"""
|
| 264 |
|
| 265 |
def __init__(self):
|
| 266 |
-
self.llm = ChatOpenAI(temperature=0, model="gpt-4o")
|
| 267 |
|
| 268 |
# Initialize tools
|
| 269 |
self.wikipedia_tool = WikipediaTool()
|
|
@@ -301,29 +306,30 @@ class MainAgent:
|
|
| 301 |
prompt = f"""You are an expert in transforming user questions into clear, specific, and search-optimized queries.
|
| 302 |
|
| 303 |
Rewrite the following question with the following goals:
|
| 304 |
-
- Add any necessary missing context
|
| 305 |
-
- Make the question as specific as possible for retrieval by a search engine or knowledge base
|
| 306 |
-
-
|
| 307 |
-
-
|
| 308 |
-
- Ensure the wording is grammatically correct, concise, and ready to use for search.
|
| 309 |
-
- Remove any unnecessary whitespace, special characters, or ambiguity.
|
| 310 |
|
| 311 |
Question: {question}
|
| 312 |
|
| 313 |
Example:
|
| 314 |
-
Question:
|
| 315 |
-
|
| 316 |
|
| 317 |
-
Question: Who
|
| 318 |
-
|
| 319 |
-
Rewritten: what are the baseball players and their respective numbers before and after Taishō Tamai's number as of July 2023?
|
| 320 |
|
|
|
|
|
|
|
| 321 |
|
| 322 |
-
Return only the rewritten
|
| 323 |
-
|
| 324 |
"""
|
| 325 |
response = self.llm.invoke(prompt)
|
| 326 |
-
|
|
|
|
|
|
|
| 327 |
|
| 328 |
|
| 329 |
def _get_answer_using_tools(self, question: str) -> str:
|
|
@@ -528,7 +534,16 @@ class MainAgent:
|
|
| 528 |
# Handle YouTube URLs in general questions
|
| 529 |
if content_type == "youtube":
|
| 530 |
self.last_used_tool = "youtube"
|
| 531 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 532 |
result = self.youtube_tool._run(youtube_url, question=question)
|
| 533 |
|
| 534 |
# Use specialized prompt for YouTube analysis
|
|
@@ -560,16 +575,16 @@ class MainAgent:
|
|
| 560 |
# Check for Wikipedia specific questions first
|
| 561 |
if "wikipedia" in question_lower:
|
| 562 |
self.last_used_tool = "wiki"
|
| 563 |
-
|
| 564 |
-
wiki_result = self.wikipedia_tool._run(
|
| 565 |
answer = self._generate_answer_from_context(question, wiki_result)
|
| 566 |
if self._is_valid_answer(answer):
|
| 567 |
return answer
|
| 568 |
|
| 569 |
-
# Use general web search
|
| 570 |
self.last_used_tool = "web"
|
| 571 |
-
|
| 572 |
-
web_result = self.web_search_tool._run(
|
| 573 |
answer = self._generate_answer_from_context(question, web_result)
|
| 574 |
|
| 575 |
if self._is_valid_answer(answer):
|
|
@@ -578,8 +593,8 @@ class MainAgent:
|
|
| 578 |
# If no good answer from web search, try with Wikipedia as a last resource
|
| 579 |
if "wikipedia" not in question_lower: # Only if not already tried
|
| 580 |
self.last_used_tool = "wiki"
|
| 581 |
-
|
| 582 |
-
wiki_result = self.wikipedia_tool._run(
|
| 583 |
answer = self._generate_answer_from_context(question, wiki_result)
|
| 584 |
if self._is_valid_answer(answer):
|
| 585 |
return answer
|
|
@@ -602,17 +617,31 @@ class MainAgent:
|
|
| 602 |
Information: {context}
|
| 603 |
|
| 604 |
Instructions:
|
| 605 |
-
1.
|
| 606 |
-
2.
|
| 607 |
-
3.
|
| 608 |
-
4.
|
| 609 |
-
5.
|
| 610 |
-
6.
|
|
|
|
|
|
|
| 611 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 612 |
When answering, provide ONLY the precise answer requested.
|
| 613 |
Do not include explanations, steps, reasoning, or additional text.
|
| 614 |
Be direct and specific. GAIA benchmark requires exact matching answers.
|
| 615 |
-
For example, if asked "What is the color of the sky?", respond simply with "blue".
|
| 616 |
"""
|
| 617 |
|
| 618 |
try:
|
|
@@ -620,6 +649,9 @@ class MainAgent:
|
|
| 620 |
response = self.llm.invoke(answer_prompt)
|
| 621 |
answer = response.content if hasattr(response, 'content') else str(response)
|
| 622 |
|
|
|
|
|
|
|
|
|
|
| 623 |
return answer
|
| 624 |
except Exception as e:
|
| 625 |
return f"Could not generate an answer due to an error: {str(e)}"
|
|
|
|
| 50 |
- python: If the question refers to a Python file or contains a task ID for Python
|
| 51 |
|
| 52 |
Consider these special cases:
|
| 53 |
+
1. If the question asks to search in Wikipedia, use "wiki"
|
| 54 |
+
|
| 55 |
+
If you need any additional knowledge, use wikipedia to retrieve it.
|
| 56 |
+
For example, math formulas, historical dates, etc.
|
| 57 |
+
If you need to search the web for additional knowledge, use the web tool.
|
| 58 |
+
|
| 59 |
Return only the type and nothing else."""
|
| 60 |
)
|
| 61 |
|
|
|
|
| 241 |
2. If it requires specialized knowledge or real-time data, respond with 'TOOLS_REQUIRED'
|
| 242 |
3. For simple questions like "what is 2+2?" or "reverse this text", provide the answer directly
|
| 243 |
4. For questions about specific content, data files, or URLs, respond with 'TOOLS_REQUIRED'
|
| 244 |
+
5. For example, for math questions, you need to use the web tool or wikipedia to retrieve the formula.
|
| 245 |
+
6. Analyze the context and what the question is asking for, before answering to it.
|
| 246 |
|
| 247 |
When answering, provide ONLY the precise answer requested.
|
| 248 |
Do not include explanations, steps, reasoning, or additional text.
|
|
|
|
| 268 |
"""Main agent orchestrating the workflow"""
|
| 269 |
|
| 270 |
def __init__(self):
|
| 271 |
+
self.llm = ChatOpenAI(temperature=0, model="gpt-4o-mini")
|
| 272 |
|
| 273 |
# Initialize tools
|
| 274 |
self.wikipedia_tool = WikipediaTool()
|
|
|
|
| 306 |
prompt = f"""You are an expert in transforming user questions into clear, specific, and search-optimized queries.
|
| 307 |
|
| 308 |
Rewrite the following question with the following goals:
|
| 309 |
+
- Add any necessary missing context to make it fully unambiguous
|
| 310 |
+
- Make the question as specific as possible for retrieval by a search engine or knowledge base
|
| 311 |
+
- Ensure the query is effective for retrieving the exact information needed
|
| 312 |
+
- Query should use the context and not the entire question
|
|
|
|
|
|
|
| 313 |
|
| 314 |
Question: {question}
|
| 315 |
|
| 316 |
Example:
|
| 317 |
+
Question: How many studio albums were published by Mercedes Sosa between 2000 and 2009 (included)? You can use the latest 2022 version of english wikipedia.
|
| 318 |
+
Query: Mercedes Sosa musician
|
| 319 |
|
| 320 |
+
Question: Who did the actor who played Ray in the Polish-language version of Everybody Loves Raymond play in Magda M.? Give only the first name.
|
| 321 |
+
Query: "Everybody Loves Raymond" actor Polish version Magda M.
|
|
|
|
| 322 |
|
| 323 |
+
Question: Who are the pitchers with the number before and after Taishō Tamai's number as of July 2023? Give them to me in the form Pitcher Before, Pitcher After, use their last names only, in Roman characters.
|
| 324 |
+
Query: Taishō Tamai baseball player
|
| 325 |
|
| 326 |
+
Return only the rewritten query, no extra commentary.
|
| 327 |
+
The query should be highly optimized for retrieving the exact information needed.
|
| 328 |
"""
|
| 329 |
response = self.llm.invoke(prompt)
|
| 330 |
+
formatted_query = response.content if hasattr(response, 'content') else str(response)
|
| 331 |
+
|
| 332 |
+
return formatted_query
|
| 333 |
|
| 334 |
|
| 335 |
def _get_answer_using_tools(self, question: str) -> str:
|
|
|
|
| 534 |
# Handle YouTube URLs in general questions
|
| 535 |
if content_type == "youtube":
|
| 536 |
self.last_used_tool = "youtube"
|
| 537 |
+
# Extract YouTube URL properly
|
| 538 |
+
if parameter.startswith("http"):
|
| 539 |
+
youtube_url = parameter
|
| 540 |
+
else:
|
| 541 |
+
# Try to extract URL from question if parameter doesn't have one
|
| 542 |
+
youtube_url = self._extract_youtube_url(question)
|
| 543 |
+
if not youtube_url:
|
| 544 |
+
# If no URL found, we can't process the YouTube video
|
| 545 |
+
return "Error: No valid YouTube URL found in the question."
|
| 546 |
+
|
| 547 |
result = self.youtube_tool._run(youtube_url, question=question)
|
| 548 |
|
| 549 |
# Use specialized prompt for YouTube analysis
|
|
|
|
| 575 |
# Check for Wikipedia specific questions first
|
| 576 |
if "wikipedia" in question_lower:
|
| 577 |
self.last_used_tool = "wiki"
|
| 578 |
+
wiki_query = self._format_question(question)
|
| 579 |
+
wiki_result = self.wikipedia_tool._run(wiki_query)
|
| 580 |
answer = self._generate_answer_from_context(question, wiki_result)
|
| 581 |
if self._is_valid_answer(answer):
|
| 582 |
return answer
|
| 583 |
|
| 584 |
+
# Use general web search with improved query formatting
|
| 585 |
self.last_used_tool = "web"
|
| 586 |
+
web_query = self._format_question(question)
|
| 587 |
+
web_result = self.web_search_tool._run(web_query)
|
| 588 |
answer = self._generate_answer_from_context(question, web_result)
|
| 589 |
|
| 590 |
if self._is_valid_answer(answer):
|
|
|
|
| 593 |
# If no good answer from web search, try with Wikipedia as a last resource
|
| 594 |
if "wikipedia" not in question_lower: # Only if not already tried
|
| 595 |
self.last_used_tool = "wiki"
|
| 596 |
+
wiki_query = self._format_question(question)
|
| 597 |
+
wiki_result = self.wikipedia_tool._run(wiki_query)
|
| 598 |
answer = self._generate_answer_from_context(question, wiki_result)
|
| 599 |
if self._is_valid_answer(answer):
|
| 600 |
return answer
|
|
|
|
| 617 |
Information: {context}
|
| 618 |
|
| 619 |
Instructions:
|
| 620 |
+
1. Read the question carefully and identify exactly what is being asked for
|
| 621 |
+
2. Pay close attention to any formatting requirements in the question (e.g., "give only the city name", "without abbreviations", etc.)
|
| 622 |
+
3. Find the specific information in the context that directly answers the question
|
| 623 |
+
4. Format your answer exactly as requested - if asked for just a name, number, or code, provide only that
|
| 624 |
+
5. For numerical answers, double-check your calculation or counting
|
| 625 |
+
6. For names or places, ensure correct and complete spelling
|
| 626 |
+
7. If asked for a specific format like "comma-separated list" or "alphabetical order", follow that exactly
|
| 627 |
+
8. If asked for just a specific piece of information, do not include any other details
|
| 628 |
|
| 629 |
+
Example:
|
| 630 |
+
Question: what's the capital of france?
|
| 631 |
+
Answer: Paris
|
| 632 |
+
|
| 633 |
+
If your question asks for without abreviations:
|
| 634 |
+
city name: st. petersburg
|
| 635 |
+
Answer: Saint Petersburg
|
| 636 |
+
|
| 637 |
+
If your answer is a number, provide only the number.
|
| 638 |
+
Example:
|
| 639 |
+
Question: how many wheels does a car have?
|
| 640 |
+
Answer: 4
|
| 641 |
+
|
| 642 |
When answering, provide ONLY the precise answer requested.
|
| 643 |
Do not include explanations, steps, reasoning, or additional text.
|
| 644 |
Be direct and specific. GAIA benchmark requires exact matching answers.
|
|
|
|
| 645 |
"""
|
| 646 |
|
| 647 |
try:
|
|
|
|
| 649 |
response = self.llm.invoke(answer_prompt)
|
| 650 |
answer = response.content if hasattr(response, 'content') else str(response)
|
| 651 |
|
| 652 |
+
# Post-process to ensure answer meets formatting requirements
|
| 653 |
+
answer = answer.strip()
|
| 654 |
+
|
| 655 |
return answer
|
| 656 |
except Exception as e:
|
| 657 |
return f"Could not generate an answer due to an error: {str(e)}"
|
tools.py
CHANGED
|
@@ -93,7 +93,7 @@ class WikipediaTool(BaseTool):
|
|
| 93 |
name: str = "wikipedia"
|
| 94 |
description: str = "Search for information on Wikipedia. Useful for finding facts about people, places, events, concepts, etc."
|
| 95 |
wikipedia_tool: WikipediaQueryRun = Field(default_factory=lambda: WikipediaQueryRun(
|
| 96 |
-
api_wrapper=WikipediaAPIWrapper(top_k_results=
|
| 97 |
))
|
| 98 |
|
| 99 |
def _run(self, question: str) -> str:
|
|
@@ -154,8 +154,6 @@ class YouTubeVideoTool(BaseContentTool):
|
|
| 154 |
for entry in transcript_list
|
| 155 |
])
|
| 156 |
|
| 157 |
-
print(f"Transcript text: {transcript_text}")
|
| 158 |
-
|
| 159 |
return transcript_text
|
| 160 |
|
| 161 |
except Exception as e:
|
|
|
|
| 93 |
name: str = "wikipedia"
|
| 94 |
description: str = "Search for information on Wikipedia. Useful for finding facts about people, places, events, concepts, etc."
|
| 95 |
wikipedia_tool: WikipediaQueryRun = Field(default_factory=lambda: WikipediaQueryRun(
|
| 96 |
+
api_wrapper=WikipediaAPIWrapper(top_k_results=5)
|
| 97 |
))
|
| 98 |
|
| 99 |
def _run(self, question: str) -> str:
|
|
|
|
| 154 |
for entry in transcript_list
|
| 155 |
])
|
| 156 |
|
|
|
|
|
|
|
| 157 |
return transcript_text
|
| 158 |
|
| 159 |
except Exception as e:
|