nikhmr1235 commited on
Commit
d980140
·
verified ·
1 Parent(s): 742b076

wikipedia_search_tool

Browse files
Files changed (1) hide show
  1. helper.py +57 -1
helper.py CHANGED
@@ -266,4 +266,60 @@ audio_transcriber_tool = Tool(
266
  "Returns the transcribed text or an error message if transcription fails."
267
  ),
268
  func=transcribe_audio_from_path_or_url,
269
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
266
  "Returns the transcribed text or an error message if transcription fails."
267
  ),
268
  func=transcribe_audio_from_path_or_url,
269
+ )
270
+
271
+ from langchain_community.tools import WikipediaQueryRun
272
+ from langchain_community.utilities import WikipediaAPIWrapper
273
+ from langchain.tools import Tool # Ensure Tool is imported
274
+
275
+ def get_wikipedia_tool() -> Tool:
276
+ """
277
+ Creates and returns a LangChain Tool for querying Wikipedia.
278
+ """
279
+ # Initialize the Wikipedia API Wrapper
280
+ # Configure the wrapper to control how much content is returned.
281
+ # 'top_k_results': Number of search results to return.
282
+ # 'doc_content_chars_max': Maximum number of characters from the page content.
283
+ # Setting this to a reasonable limit (e.g., 2000-4000)
284
+ # is crucial for token management, as Wikipedia articles can be very long.
285
+ # Adjust based on your LLM's context window and typical use cases.
286
+ wikipedia_api_wrapper = WikipediaAPIWrapper(
287
+ top_k_results=3, # Get up to 3 most relevant articles
288
+ doc_content_chars_max=2000 # Limit content to 2000 characters per article
289
+ )
290
+
291
+ # Initialize the Wikipedia tool with the wrapper
292
+ wikipedia_query_tool = WikipediaQueryRun(api_wrapper=wikipedia_api_wrapper)
293
+
294
+ return Tool(
295
+ name="wikipedia_search_tool",
296
+ description="""
297
+ A specialized search tool for retrieving information from Wikipedia.
298
+ Use this tool when you need:
299
+ - Authoritative and well-established facts.
300
+ - Historical information, biographies, or scientific explanations.
301
+ - General knowledge about specific concepts, people, places, or events.
302
+ - Background information that is unlikely to be very recent.
303
+
304
+ **Input Format (CRITICAL):**
305
+ The input MUST be a concise and clear query string representing the topic you want to search on Wikipedia.
306
+ Think of a noun phrase or a short question that directly names the subject.
307
+ Example: "Albert Einstein"
308
+ Example: "Battle of Gettysburg"
309
+ Example: "photosynthesis process"
310
+
311
+ **DO NOT:**
312
+ - Ask natural language questions that are not search queries.
313
+ - Seek real-time information (e.g., current news, today's weather). For real-time data, use 'tavily_search'.
314
+ - Provide incomplete sentences or ambiguous terms.
315
+ - Expect this tool to perform calculations or access external websites beyond Wikipedia.
316
+
317
+ **Output:**
318
+ The tool returns a string containing snippets of relevant Wikipedia articles.
319
+ The output is limited in length to save tokens. If the answer is not found in the snippet,
320
+ you might need to refine your query or consider using a broader search tool like 'tavily_search'.
321
+ """,
322
+ func=wikipedia_query_tool.run,
323
+ )
324
+
325
+ wikipedia_search_tool = get_wikipedia_tool()