hanshan1988 commited on
Commit
ee950fa
·
1 Parent(s): f90fb02

improved wikipedia tool

Browse files
Files changed (2) hide show
  1. app.py +6 -6
  2. tools.py +33 -6
app.py CHANGED
@@ -24,7 +24,7 @@ from langgraph.graph.message import add_messages
24
  from langchain_huggingface import ChatHuggingFace, HuggingFaceEndpoint
25
  from langgraph.prebuilt import ToolNode, tools_condition
26
 
27
- from tools import fetch_website, ask_wiki, youtube_transcript, python_repl_tool
28
 
29
  # Initialize the Hugging Face model
30
  hf_model_name = "openai/gpt-oss-120b" # "Qwen/Qwen2.5-72B-Instruct"
@@ -43,7 +43,7 @@ chat_model = ChatHuggingFace(llm=llm)
43
  # Equip llm with tools
44
  tools_list = [
45
  fetch_website,
46
- ask_wiki,
47
  youtube_transcript,
48
  python_repl_tool
49
  ]
@@ -70,12 +70,12 @@ def assistant(state: AgentState):
70
  Returns:
71
  The title and content of the website.
72
 
73
- ask_wiki(query: str) -> str:
74
- Retreive information from Wikipedia based on a user query.
75
  Args:
76
- query: A user query.
77
  Returns:
78
- A single string containing the retrieved article from Wikipedia.
79
 
80
  youtube_transcript(url: str) -> str:
81
  Fetch the transcript of a youtube video.
 
24
  from langchain_huggingface import ChatHuggingFace, HuggingFaceEndpoint
25
  from langgraph.prebuilt import ToolNode, tools_condition
26
 
27
+ from tools import fetch_website, get_wiki_full, youtube_transcript, python_repl_tool
28
 
29
  # Initialize the Hugging Face model
30
  hf_model_name = "openai/gpt-oss-120b" # "Qwen/Qwen2.5-72B-Instruct"
 
43
  # Equip llm with tools
44
  tools_list = [
45
  fetch_website,
46
+ get_wiki_full,
47
  youtube_transcript,
48
  python_repl_tool
49
  ]
 
70
  Returns:
71
  The title and content of the website.
72
 
73
+ get_wiki_full(query: str) -> str:
74
+ Scrape the content of a Wikipedia page based on the user query.
75
  Args:
76
+ query: The user query to search for on Wikipedia.
77
  Returns:
78
+ A single string containing the content of the Wikipedia page.
79
 
80
  youtube_transcript(url: str) -> str:
81
  Fetch the transcript of a youtube video.
tools.py CHANGED
@@ -1,10 +1,14 @@
1
  import time
 
 
 
2
  from langchain.tools import tool
3
  from langchain_community.utilities import WikipediaAPIWrapper
4
  from langchain_community.tools import WikipediaQueryRun
5
  from langchain_community.document_loaders import YoutubeLoader, WebBaseLoader
6
  from langchain_experimental.utilities import PythonREPL
7
 
 
8
  # Initialize Python REPL
9
  python_repl = PythonREPL()
10
 
@@ -20,23 +24,46 @@ def fetch_website(url:str) -> str:
20
  docs = loader.load()
21
  return docs[0].page_content
22
 
23
- @tool
24
- def ask_wiki(query: str) -> str:
25
- """Retrieve information from Wikipedia based on a user query.
26
  Args:
27
  query: A user query.
28
  Returns:
29
- A single string containing the retrieved article from Wikipedia.
30
  """
31
  if not query.strip():
32
  return "Please provide a valid query."
33
  try:
34
- wiki_toolapi_wrapper = WikipediaAPIWrapper(top_k_results=1, doc_content_chars_max=8000)
 
35
  wiki_tool = WikipediaQueryRun(api_wrapper=wiki_toolapi_wrapper)
36
  result = wiki_tool.run(query)
37
- return result
 
 
38
  except Exception as e:
39
  return f"Error retrieving information: {str(e)}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
40
 
41
  @tool
42
  def youtube_transcript(url: str) -> str:
 
1
  import time
2
+ import requests
3
+ from bs4 import BeautifulSoup
4
+
5
  from langchain.tools import tool
6
  from langchain_community.utilities import WikipediaAPIWrapper
7
  from langchain_community.tools import WikipediaQueryRun
8
  from langchain_community.document_loaders import YoutubeLoader, WebBaseLoader
9
  from langchain_experimental.utilities import PythonREPL
10
 
11
+
12
  # Initialize Python REPL
13
  python_repl = PythonREPL()
14
 
 
24
  docs = loader.load()
25
  return docs[0].page_content
26
 
27
+ def get_wiki_title(query: str) -> str:
28
+ """Retrieve Wikipedia page title based on a user query.
 
29
  Args:
30
  query: A user query.
31
  Returns:
32
+ A single string containing the retrieved article page title from Wikipedia.
33
  """
34
  if not query.strip():
35
  return "Please provide a valid query."
36
  try:
37
+ # Reduce length of retrieved content as we just need the title
38
+ wiki_toolapi_wrapper = WikipediaAPIWrapper(top_k_results=1, doc_content_chars_max=1000)
39
  wiki_tool = WikipediaQueryRun(api_wrapper=wiki_toolapi_wrapper)
40
  result = wiki_tool.run(query)
41
+ # Extract the title from the result (assuming it's in the format "Page: <title>\nSummary: <summary>")
42
+ title = result.split("\n")[0].replace("Page: ", "")
43
+ return title
44
  except Exception as e:
45
  return f"Error retrieving information: {str(e)}"
46
+
47
+ @tool
48
+ def get_wiki_full(query: str) -> str:
49
+ """Scrape the content of a Wikipedia page based on the user query.
50
+
51
+ Args:
52
+ query: The user query to search for on Wikipedia.
53
+ Returns:
54
+ A single string containing the content of the Wikipedia page.
55
+ """
56
+ title = get_wiki_title(query)
57
+ url = f'https://en.wikipedia.org/wiki/{title.replace(" ", "_")}'
58
+ headers = {'User-Agent': 'Mozilla/5.0'}
59
+
60
+ response = requests.get(url, headers=headers)
61
+ soup = BeautifulSoup(response.content, 'html.parser')
62
+
63
+ # Get all content from main article
64
+ content = soup.find('div', {'id': 'mw-content-text'})
65
+
66
+ return content.get_text()[:32_000] # Limit to 8k tokens to avoid excessive length
67
 
68
  @tool
69
  def youtube_transcript(url: str) -> str: