nikhmr1235 commited on
Commit
b2fb17e
·
verified ·
1 Parent(s): d116ff2

Update helper.py

Browse files
Files changed (1) hide show
  1. helper.py +25 -94
helper.py CHANGED
@@ -1,6 +1,8 @@
1
  from langchain_experimental.utilities import PythonREPL
2
  from langchain.tools import Tool
3
  from langchain_community.tools import TavilySearchResults
 
 
4
  import re
5
 
6
  # For newer LangChain versions, sometimes it's directly from langchain.tools.python
@@ -234,100 +236,6 @@ from langchain_community.utilities import WikipediaAPIWrapper
234
  from langchain.tools import Tool # Ensure Tool is imported
235
 
236
 
237
- def get_wikipedia_tool() -> Tool:
238
- """
239
- Creates and returns a LangChain Tool for querying Wikipedia.
240
- """
241
- wikipedia_api_wrapper = WikipediaAPIWrapper(
242
- top_k_results=3,
243
- doc_content_chars_max=4000
244
- )
245
-
246
- wikipedia_query_tool = WikipediaQueryRun(api_wrapper=wikipedia_api_wrapper)
247
-
248
- return Tool(
249
- name="wikipedia_search_tool",
250
- description="""
251
- A specialized search tool for retrieving information from Wikipedia.
252
- Use this tool when you need:
253
- - Authoritative and well-established facts.
254
- - Historical information, biographies, or scientific explanations.
255
- - General knowledge about specific concepts, people, places, or events.
256
- - Background information that is unlikely to be very recent.
257
-
258
- **Input Format (CRITICAL):**
259
- The input MUST be a concise and clear query string representing the topic you want to search on Wikipedia.
260
- Think of a noun phrase or a short question that directly names the subject.
261
- Example: "Albert Einstein"
262
- Example: "Battle of Gettysburg"
263
- Example: "photosynthesis process"
264
-
265
- **DO NOT:**
266
- - Ask natural language questions that are not search queries.
267
- - Seek real-time information (e.g., current news, today's weather). For real-time data, use 'tavily_search'.
268
- - Provide incomplete sentences or ambiguous terms.
269
- - Expect this tool to perform calculations or access external websites beyond Wikipedia.
270
-
271
- **Output:**
272
- The tool returns a string containing snippets of relevant Wikipedia articles.
273
- The output is limited in length to save tokens. If the answer is not found in the snippet,
274
- you might need to refine your query or **consider using 'tavily_search' or other available tools for a broader search.**
275
- """,
276
- func=wikipedia_query_tool.run,
277
- )
278
-
279
- wikipedia_search_tool = get_wikipedia_tool()
280
-
281
- import wikipedia
282
-
283
- def wikipedia_full_content(query: str) -> str:
284
- """
285
- Fetches the full content of the top Wikipedia article for a query.
286
- If a section is specified in the query (e.g., "Mercedes Sosa Discography"),
287
- it tries to extract that section.
288
- """
289
- try:
290
- # Try to split query into page and section
291
- if " section:" in query:
292
- page_query, section = query.split(" section:", 1)
293
- else:
294
- page_query, section = query, None
295
-
296
- results = wikipedia.search(page_query)
297
- if not results:
298
- return "No Wikipedia article found for your query."
299
- page = wikipedia.page(results[0])
300
- content = page.content
301
-
302
- # If a section is specified, try to extract it
303
- if section:
304
- import re
305
- # Simple regex to extract section
306
- pattern = rf"==+\s*{re.escape(section.strip())}\s*==+(.*?)(==+|$)"
307
- match = re.search(pattern, content, re.DOTALL | re.IGNORECASE)
308
- if match:
309
- section_content = match.group(1).strip()
310
- return section_content[:2000] # Limit for token safety
311
- else:
312
- return f"Section '{section}' not found. Returning start of article:\n\n{content[:2000]}"
313
- else:
314
- return content[:2000] # Limit for token safety
315
-
316
- except Exception as e:
317
- return f"Wikipedia tool error: {e}"
318
-
319
- wikipedia_full_content_tool = Tool(
320
- name="wikipedia_full_content_tool",
321
- description="""
322
- Fetches the full content (or a specific section) of a Wikipedia article for a given query.
323
- Use this tool for questions about nominations, discographies, lists, or when you need more than a summary.
324
- To get a section, use the format: "Page Title section: Section Name"
325
- Example: "Mercedes Sosa section: Discography"
326
- Example: "Dinosaur featured articles section: Featured article nominations"
327
- """,
328
- func=wikipedia_full_content,
329
- )
330
-
331
  import os
332
  from serpapi import GoogleSearch # Or use SerpApiClient for other engines
333
  from typing import Dict, Any
@@ -539,4 +447,27 @@ gemini_multimodal_tool = Tool(
539
  name="gemini_multimodal_tool",
540
  description=analyze_image_with_gemini.__doc__, # Use the docstring as description
541
  func=analyze_image_with_gemini,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
542
  )
 
1
  from langchain_experimental.utilities import PythonREPL
2
  from langchain.tools import Tool
3
  from langchain_community.tools import TavilySearchResults
4
+ from langchain_community.document_loaders import WikipediaLoader
5
+
6
  import re
7
 
8
  # For newer LangChain versions, sometimes it's directly from langchain.tools.python
 
236
  from langchain.tools import Tool # Ensure Tool is imported
237
 
238
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
239
  import os
240
  from serpapi import GoogleSearch # Or use SerpApiClient for other engines
241
  from typing import Dict, Any
 
447
  name="gemini_multimodal_tool",
448
  description=analyze_image_with_gemini.__doc__, # Use the docstring as description
449
  func=analyze_image_with_gemini,
450
+ )
451
+
452
+
453
+ def wiki_search(query: str) -> str:
454
+ """Search Wikipedia for a query and return maximum 2 results.
455
+
456
+ Args:
457
+ query: The search query.
458
+ Returns:
459
+ A string with formatted Wikipedia search results.
460
+ """
461
+ search_docs = WikipediaLoader(query=query, load_max_docs=2).load()
462
+ formatted_search_docs = "\n\n---\n\n".join(
463
+ [
464
+ f'<Document source="{doc.metadata.get("source", "")}" page="{doc.metadata.get("page", "")}"/>\n{doc.page_content}\n</Document>'
465
+ for doc in search_docs
466
+ ])
467
+ return formatted_search_docs
468
+
469
+ wikipedia_search_tool2 = Tool(
470
+ name="wikipedia_search_tool2",
471
+ description=wiki_search.__doc__,
472
+ func=wiki_search,
473
  )