BetsyFromR commited on
Commit
db6830a
·
verified ·
1 Parent(s): e80b45c

Update tools/visit_webpage.py

Browse files
Files changed (1) hide show
  1. tools/visit_webpage.py +10 -1
tools/visit_webpage.py CHANGED
@@ -23,8 +23,17 @@ class VisitWebpageTool(Tool):
23
  "You must install packages `markdownify` and `requests` to run this tool: for instance run `pip install markdownify requests`."
24
  ) from e
25
  try:
 
 
 
 
 
 
 
 
26
  # Send a GET request to the URL with a 20-second timeout
27
- response = requests.get(url, timeout=20)
 
28
  response.raise_for_status() # Raise an exception for bad status codes
29
 
30
  # Convert the HTML content to Markdown
 
23
  "You must install packages `markdownify` and `requests` to run this tool: for instance run `pip install markdownify requests`."
24
  ) from e
25
  try:
26
+ # Wikipedia and similar sites can block requests without a User-Agent.
27
+ headers = {
28
+ "User-Agent": (
29
+ "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) "
30
+ "AppleWebKit/537.36 (KHTML, like Gecko) "
31
+ "Chrome/122.0.0.0 Safari/537.36"
32
+ )
33
+ }
34
  # Send a GET request to the URL with a 20-second timeout
35
+ response = requests.get(url, headers=headers, timeout=20)
36
+ # response = requests.get(url, timeout=20)
37
  response.raise_for_status() # Raise an exception for bad status codes
38
 
39
  # Convert the HTML content to Markdown