Final_Assignment_Template

Sleeping

App Files Files Community

hanshan1988 commited on Feb 22

Commit

ee950fa

1 Parent(s): f90fb02

improved wikipedia tool

Browse files

Files changed (2) hide show

app.py +6 -6
tools.py +33 -6

app.py CHANGED Viewed

@@ -24,7 +24,7 @@ from langgraph.graph.message import add_messages
 from langchain_huggingface import ChatHuggingFace, HuggingFaceEndpoint
 from langgraph.prebuilt import ToolNode, tools_condition
-from tools import fetch_website, ask_wiki, youtube_transcript, python_repl_tool
 # Initialize the Hugging Face model
 hf_model_name = "openai/gpt-oss-120b" # "Qwen/Qwen2.5-72B-Instruct"
@@ -43,7 +43,7 @@ chat_model = ChatHuggingFace(llm=llm)
 # Equip llm with tools
 tools_list = [
     fetch_website,
-    ask_wiki,
     youtube_transcript,
     python_repl_tool
 ]
@@ -70,12 +70,12 @@ def assistant(state: AgentState):
             Returns:
                 The title and content of the website.
-        ask_wiki(query: str) -> str:
-            Retreive information from Wikipedia based on a user query.
             Args:
-                query: A user query.
             Returns:
-                A single string containing the retrieved article from Wikipedia.
         youtube_transcript(url: str) -> str:
             Fetch the transcript of a youtube video.

 from langchain_huggingface import ChatHuggingFace, HuggingFaceEndpoint
 from langgraph.prebuilt import ToolNode, tools_condition
+from tools import fetch_website, get_wiki_full, youtube_transcript, python_repl_tool
 # Initialize the Hugging Face model
 hf_model_name = "openai/gpt-oss-120b" # "Qwen/Qwen2.5-72B-Instruct"
 # Equip llm with tools
 tools_list = [
     fetch_website,
+    get_wiki_full,
     youtube_transcript,
     python_repl_tool
 ]
             Returns:
                 The title and content of the website.
+        get_wiki_full(query: str) -> str:
+            Scrape the content of a Wikipedia page based on the user query.
             Args:
+                query: The user query to search for on Wikipedia.
             Returns:
+                A single string containing the content of the Wikipedia page.
         youtube_transcript(url: str) -> str:
             Fetch the transcript of a youtube video.

tools.py CHANGED Viewed

@@ -1,10 +1,14 @@
 import time
 from langchain.tools import tool
 from langchain_community.utilities import WikipediaAPIWrapper
 from langchain_community.tools import WikipediaQueryRun
 from langchain_community.document_loaders import YoutubeLoader, WebBaseLoader
 from langchain_experimental.utilities import PythonREPL
 # Initialize Python REPL
 python_repl = PythonREPL()
@@ -20,23 +24,46 @@ def fetch_website(url:str) -> str:
     docs = loader.load()
     return docs[0].page_content
-@tool
-def ask_wiki(query: str) -> str:
-    """Retrieve information from Wikipedia based on a user query.
     Args:
         query: A user query.
     Returns:
-        A single string containing the retrieved article from Wikipedia.
     """
     if not query.strip():
         return "Please provide a valid query."
     try:
-        wiki_toolapi_wrapper = WikipediaAPIWrapper(top_k_results=1, doc_content_chars_max=8000)
         wiki_tool = WikipediaQueryRun(api_wrapper=wiki_toolapi_wrapper)
         result = wiki_tool.run(query)
-        return result
     except Exception as e:
         return f"Error retrieving information: {str(e)}"
 @tool
 def youtube_transcript(url: str) -> str:

 import time
+import requests
+from bs4 import BeautifulSoup
 from langchain.tools import tool
 from langchain_community.utilities import WikipediaAPIWrapper
 from langchain_community.tools import WikipediaQueryRun
 from langchain_community.document_loaders import YoutubeLoader, WebBaseLoader
 from langchain_experimental.utilities import PythonREPL
 # Initialize Python REPL
 python_repl = PythonREPL()
     docs = loader.load()
     return docs[0].page_content
+def get_wiki_title(query: str) -> str:
+    """Retrieve Wikipedia page title based on a user query.
     Args:
         query: A user query.
     Returns:
+        A single string containing the retrieved article page title from Wikipedia.
     """
     if not query.strip():
         return "Please provide a valid query."
     try:
+        # Reduce length of retrieved content as we just need the title
+        wiki_toolapi_wrapper = WikipediaAPIWrapper(top_k_results=1, doc_content_chars_max=1000)
         wiki_tool = WikipediaQueryRun(api_wrapper=wiki_toolapi_wrapper)
         result = wiki_tool.run(query)
+        # Extract the title from the result (assuming it's in the format "Page: <title>\nSummary: <summary>")
+        title = result.split("\n")[0].replace("Page: ", "")
+        return title
     except Exception as e:
         return f"Error retrieving information: {str(e)}"
+@tool
+def get_wiki_full(query: str) -> str:
+    """Scrape the content of a Wikipedia page based on the user query.
+    Args:
+        query: The user query to search for on Wikipedia.
+    Returns:
+        A single string containing the content of the Wikipedia page.
+    """
+    title = get_wiki_title(query)
+    url = f'https://en.wikipedia.org/wiki/{title.replace(" ", "_")}'
+    headers = {'User-Agent': 'Mozilla/5.0'}
+    response = requests.get(url, headers=headers)
+    soup = BeautifulSoup(response.content, 'html.parser')
+    # Get all content from main article
+    content = soup.find('div', {'id': 'mw-content-text'})
+    return content.get_text()[:32_000]  # Limit to 8k tokens to avoid excessive length
 @tool
 def youtube_transcript(url: str) -> str: