Commit ·
ee950fa
1
Parent(s): f90fb02
improved wikipedia tool
Browse files
app.py
CHANGED
|
@@ -24,7 +24,7 @@ from langgraph.graph.message import add_messages
|
|
| 24 |
from langchain_huggingface import ChatHuggingFace, HuggingFaceEndpoint
|
| 25 |
from langgraph.prebuilt import ToolNode, tools_condition
|
| 26 |
|
| 27 |
-
from tools import fetch_website,
|
| 28 |
|
| 29 |
# Initialize the Hugging Face model
|
| 30 |
hf_model_name = "openai/gpt-oss-120b" # "Qwen/Qwen2.5-72B-Instruct"
|
|
@@ -43,7 +43,7 @@ chat_model = ChatHuggingFace(llm=llm)
|
|
| 43 |
# Equip llm with tools
|
| 44 |
tools_list = [
|
| 45 |
fetch_website,
|
| 46 |
-
|
| 47 |
youtube_transcript,
|
| 48 |
python_repl_tool
|
| 49 |
]
|
|
@@ -70,12 +70,12 @@ def assistant(state: AgentState):
|
|
| 70 |
Returns:
|
| 71 |
The title and content of the website.
|
| 72 |
|
| 73 |
-
|
| 74 |
-
|
| 75 |
Args:
|
| 76 |
-
query:
|
| 77 |
Returns:
|
| 78 |
-
A single string containing the
|
| 79 |
|
| 80 |
youtube_transcript(url: str) -> str:
|
| 81 |
Fetch the transcript of a youtube video.
|
|
|
|
| 24 |
from langchain_huggingface import ChatHuggingFace, HuggingFaceEndpoint
|
| 25 |
from langgraph.prebuilt import ToolNode, tools_condition
|
| 26 |
|
| 27 |
+
from tools import fetch_website, get_wiki_full, youtube_transcript, python_repl_tool
|
| 28 |
|
| 29 |
# Initialize the Hugging Face model
|
| 30 |
hf_model_name = "openai/gpt-oss-120b" # "Qwen/Qwen2.5-72B-Instruct"
|
|
|
|
| 43 |
# Equip llm with tools
|
| 44 |
tools_list = [
|
| 45 |
fetch_website,
|
| 46 |
+
get_wiki_full,
|
| 47 |
youtube_transcript,
|
| 48 |
python_repl_tool
|
| 49 |
]
|
|
|
|
| 70 |
Returns:
|
| 71 |
The title and content of the website.
|
| 72 |
|
| 73 |
+
get_wiki_full(query: str) -> str:
|
| 74 |
+
Scrape the content of a Wikipedia page based on the user query.
|
| 75 |
Args:
|
| 76 |
+
query: The user query to search for on Wikipedia.
|
| 77 |
Returns:
|
| 78 |
+
A single string containing the content of the Wikipedia page.
|
| 79 |
|
| 80 |
youtube_transcript(url: str) -> str:
|
| 81 |
Fetch the transcript of a youtube video.
|
tools.py
CHANGED
|
@@ -1,10 +1,14 @@
|
|
| 1 |
import time
|
|
|
|
|
|
|
|
|
|
| 2 |
from langchain.tools import tool
|
| 3 |
from langchain_community.utilities import WikipediaAPIWrapper
|
| 4 |
from langchain_community.tools import WikipediaQueryRun
|
| 5 |
from langchain_community.document_loaders import YoutubeLoader, WebBaseLoader
|
| 6 |
from langchain_experimental.utilities import PythonREPL
|
| 7 |
|
|
|
|
| 8 |
# Initialize Python REPL
|
| 9 |
python_repl = PythonREPL()
|
| 10 |
|
|
@@ -20,23 +24,46 @@ def fetch_website(url:str) -> str:
|
|
| 20 |
docs = loader.load()
|
| 21 |
return docs[0].page_content
|
| 22 |
|
| 23 |
-
|
| 24 |
-
|
| 25 |
-
"""Retrieve information from Wikipedia based on a user query.
|
| 26 |
Args:
|
| 27 |
query: A user query.
|
| 28 |
Returns:
|
| 29 |
-
A single string containing the retrieved article from Wikipedia.
|
| 30 |
"""
|
| 31 |
if not query.strip():
|
| 32 |
return "Please provide a valid query."
|
| 33 |
try:
|
| 34 |
-
|
|
|
|
| 35 |
wiki_tool = WikipediaQueryRun(api_wrapper=wiki_toolapi_wrapper)
|
| 36 |
result = wiki_tool.run(query)
|
| 37 |
-
|
|
|
|
|
|
|
| 38 |
except Exception as e:
|
| 39 |
return f"Error retrieving information: {str(e)}"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 40 |
|
| 41 |
@tool
|
| 42 |
def youtube_transcript(url: str) -> str:
|
|
|
|
| 1 |
import time
|
| 2 |
+
import requests
|
| 3 |
+
from bs4 import BeautifulSoup
|
| 4 |
+
|
| 5 |
from langchain.tools import tool
|
| 6 |
from langchain_community.utilities import WikipediaAPIWrapper
|
| 7 |
from langchain_community.tools import WikipediaQueryRun
|
| 8 |
from langchain_community.document_loaders import YoutubeLoader, WebBaseLoader
|
| 9 |
from langchain_experimental.utilities import PythonREPL
|
| 10 |
|
| 11 |
+
|
| 12 |
# Initialize Python REPL
|
| 13 |
python_repl = PythonREPL()
|
| 14 |
|
|
|
|
| 24 |
docs = loader.load()
|
| 25 |
return docs[0].page_content
|
| 26 |
|
| 27 |
+
def get_wiki_title(query: str) -> str:
|
| 28 |
+
"""Retrieve Wikipedia page title based on a user query.
|
|
|
|
| 29 |
Args:
|
| 30 |
query: A user query.
|
| 31 |
Returns:
|
| 32 |
+
A single string containing the retrieved article page title from Wikipedia.
|
| 33 |
"""
|
| 34 |
if not query.strip():
|
| 35 |
return "Please provide a valid query."
|
| 36 |
try:
|
| 37 |
+
# Reduce length of retrieved content as we just need the title
|
| 38 |
+
wiki_toolapi_wrapper = WikipediaAPIWrapper(top_k_results=1, doc_content_chars_max=1000)
|
| 39 |
wiki_tool = WikipediaQueryRun(api_wrapper=wiki_toolapi_wrapper)
|
| 40 |
result = wiki_tool.run(query)
|
| 41 |
+
# Extract the title from the result (assuming it's in the format "Page: <title>\nSummary: <summary>")
|
| 42 |
+
title = result.split("\n")[0].replace("Page: ", "")
|
| 43 |
+
return title
|
| 44 |
except Exception as e:
|
| 45 |
return f"Error retrieving information: {str(e)}"
|
| 46 |
+
|
| 47 |
+
@tool
|
| 48 |
+
def get_wiki_full(query: str) -> str:
|
| 49 |
+
"""Scrape the content of a Wikipedia page based on the user query.
|
| 50 |
+
|
| 51 |
+
Args:
|
| 52 |
+
query: The user query to search for on Wikipedia.
|
| 53 |
+
Returns:
|
| 54 |
+
A single string containing the content of the Wikipedia page.
|
| 55 |
+
"""
|
| 56 |
+
title = get_wiki_title(query)
|
| 57 |
+
url = f'https://en.wikipedia.org/wiki/{title.replace(" ", "_")}'
|
| 58 |
+
headers = {'User-Agent': 'Mozilla/5.0'}
|
| 59 |
+
|
| 60 |
+
response = requests.get(url, headers=headers)
|
| 61 |
+
soup = BeautifulSoup(response.content, 'html.parser')
|
| 62 |
+
|
| 63 |
+
# Get all content from main article
|
| 64 |
+
content = soup.find('div', {'id': 'mw-content-text'})
|
| 65 |
+
|
| 66 |
+
return content.get_text()[:32_000] # Limit to 8k tokens to avoid excessive length
|
| 67 |
|
| 68 |
@tool
|
| 69 |
def youtube_transcript(url: str) -> str:
|