nikhmr1235 commited on
Commit
f22cccb
·
verified ·
1 Parent(s): c9454da

Update helper.py

Browse files
Files changed (1) hide show
  1. helper.py +58 -23
helper.py CHANGED
@@ -47,51 +47,86 @@ repl_tool = Tool(
47
  )
48
 
49
 
50
- def get_travily_api_search_tool(tavily_api_key: str) -> Tool:
 
 
 
 
 
 
 
 
 
 
 
51
  """
52
- Creates and returns a LangChain Tool for Tavily Search API.
 
53
 
54
  Args:
55
  tavily_api_key: The API key for Tavily Search.
 
 
 
 
 
 
 
 
56
 
57
  Returns:
58
  A LangChain Tool configured for Tavily Search.
59
  """
60
  tavily_search = TavilySearchResults(
61
- max_results=3, # Limits the number of search results to 3
62
- include_answer=True, # Prioritizes direct answers from search results
63
- include_raw_content=False, # Excludes full raw content of pages to save tokens
64
- include_images=False, # Excludes image results to save tokens
65
- tavily_api_key=tavily_api_key
 
 
 
 
 
 
 
 
 
 
 
66
  )
67
 
 
68
  return Tool(
69
  name="tavily_search",
70
  description="""
71
- A powerful search engine tool for real-time information retrieval from the web.
 
 
72
  Use this tool when you need to:
73
- - Find up-to-date information, facts, or statistics.
74
- - Research current events, people, or concepts.
75
  - Answer questions that require external knowledge not present in the model's training data.
76
- - Verify information or get factual answers.
77
 
78
  **Input Format (CRITICAL):**
79
- The input MUST be a concise, clear, and specific search query string.
80
- Think of what you would type into a Google search bar.
81
- Example: "current weather in London"
82
- Example: "population of India 2024"
83
- Example: "history of the internet"
 
84
 
85
  **DO NOT:**
86
- - Ask natural language questions that are not search queries.
87
- - Provide incomplete sentences or ambiguous terms.
88
- - Include personal information or sensitive data in your queries.
89
- - Expect direct execution of commands or calculations; this is a search tool.
90
 
91
  **Output:**
92
- The tool returns a JSON string containing relevant search results.
93
- It prioritizes a direct 'answer' if available. Otherwise, it provides a list of
94
- search results including titles, URLs, and snippets.
95
  """,
96
  func=tavily_search.run,
97
  )
 
47
  )
48
 
49
 
50
+ from langchain_community.tools import TavilySearchResults
51
+ from langchain.tools import Tool
52
+ from typing import List, Optional
53
+
54
+ def get_travily_api_search_tool(
55
+ tavily_api_key: str,
56
+ # Optional parameters for more fine-grained control
57
+ search_depth: str = "advanced",
58
+ include_domains: Optional[List[str]] = None,
59
+ exclude_domains: Optional[List[str]] = None,
60
+ time_range: Optional[str] = None
61
+ ) -> Tool:
62
  """
63
+ Creates and returns a LangChain Tool for Tavily Search API, optimized for GAIA-like
64
+ fact-based questions to improve consistency and accuracy.
65
 
66
  Args:
67
  tavily_api_key: The API key for Tavily Search.
68
+ search_depth: 'basic' or 'advanced'. 'advanced' can provide more comprehensive results.
69
+ Default: 'advanced' for better factual depth.
70
+ include_domains: Optional list of domains to specifically include in search results.
71
+ Useful for targeting highly reputable sources.
72
+ exclude_domains: Optional list of domains to specifically exclude from search results.
73
+ Useful for filtering out unreliable sources.
74
+ time_range: Optional time range for results (e.g., 'day', 'week', 'month', 'year').
75
+ Useful for stabilizing results for historical facts or focusing on recent data.
76
 
77
  Returns:
78
  A LangChain Tool configured for Tavily Search.
79
  """
80
  tavily_search = TavilySearchResults(
81
+ max_results=5, # Increased from 3 to 5 (or more) for a larger pool of data.
82
+ # This increases the chance of finding the correct answer even if rankings shift.
83
+ include_answer=False, # **CRITICAL CHANGE for GAIA L1/L2:**
84
+ # For precise factual answers, it's generally better to
85
+ # let YOUR agent's LLM synthesize the answer from raw snippets.
86
+ # Tavily's `include_answer` uses its own LLM which adds another
87
+ # layer of potential variability and might not always be perfectly aligned
88
+ # with GAIA's strict answer format.
89
+ include_raw_content=False, # Good for saving tokens, keep as is.
90
+ include_images=False, # Good for saving tokens, keep as is.
91
+ tavily_api_key=tavily_api_key,
92
+ # Incorporating new parameters for better control:
93
+ search_depth=search_depth, # Passed from function argument, default 'advanced'
94
+ include_domains=include_domains,
95
+ exclude_domains=exclude_domains,
96
+ time_range=time_range
97
  )
98
 
99
+ # Updated description to reflect changes and emphasize strict input/output for GAIA
100
  return Tool(
101
  name="tavily_search",
102
  description="""
103
+ A powerful and precise search engine tool for real-time, factual information retrieval from the web.
104
+ Optimized for fact-based questions (like GAIA Level 1 & 2).
105
+
106
  Use this tool when you need to:
107
+ - Find definitive, up-to-date facts, statistics, or direct answers.
108
+ - Research specific historical events, scientific data, or definitions.
109
  - Answer questions that require external knowledge not present in the model's training data.
110
+ - Verify information or get unambiguous factual answers.
111
 
112
  **Input Format (CRITICAL):**
113
+ The input MUST be a concise, clear, and highly specific search query string.
114
+ Formulate your query as if you're trying to get a single, factual answer from a search engine.
115
+ Example: "population of Tokyo 2023"
116
+ Example: "date of birth of Marie Curie"
117
+ Example: "chemical formula of water"
118
+ Example: "winner of 2022 FIFA World Cup"
119
 
120
  **DO NOT:**
121
+ - Ask natural language questions that are not optimized for search queries (e.g., "Tell me a story about...").
122
+ - Provide incomplete sentences, ambiguous terms, or conversational filler.
123
+ - Include personal information or sensitive data.
124
+ - Expect direct execution of commands or calculations; this is solely a search and information retrieval tool.
125
 
126
  **Output:**
127
+ The tool returns a JSON string containing relevant search results, primarily focused on providing
128
+ snippets and URLs to help you extract the precise factual answer.
129
+ It does NOT provide a direct generated answer but raw search results for your interpretation.
130
  """,
131
  func=tavily_search.run,
132
  )