Spaces:

DeepImagix
/

self-trained2

Running

App Files Files Community

self-trained2 / agent /tools /web_tools.py

DeepImagix

Upload web_tools.py

d656581 verified 14 days ago

Raw

History Blame Contribute Delete

28.3 kB

	"""
	NeuraPrompt Agent — Web Tools v8.2 (Reliability + Per-Site Handlers)
	====================================================================

	Fixes over v8.1:
	* _safe_request now accepts any 2xx status (DDG returns 202 with valid
	content — previously discarded, breaking both DDG engines).
	* DDG Lite engine replaced with DDG HTML endpoint (html.duckduckgo.com/html/),
	which returns 200 with parseable result__a / result__snippet selectors.
	DDG Lite's HTML structure had changed and the old selectors found nothing.
	* BeautifulSoup parser falls back to html.parser if lxml is missing — works
	regardless of environment (per spec).
	* Per-site User-Agent strategy: Wikipedia etc. block generic browser UAs
	from datacenter IPs (403). Bot UA with contact info returns 200.
	* New SUPPORTED_SITES registry with per-site fetch handlers:
	- Wikipedia → REST API (reliable, no scraping)
	- GitHub → raw.githubusercontent.com (no JS, no auth needed)
	* fetch_url now has a 4-step fallback chain: site handler → bot UA →
	browser UA → web.archive.org cache. Previously gave up after one attempt.

	Public API unchanged:
	web_search(query) -> str
	fetch_url(url) -> str
	"""

	import requests
	import random
	import time
	import re
	from urllib.parse import quote_plus, unquote, urlparse, parse_qs
	from bs4 import BeautifulSoup
	from requests.adapters import HTTPAdapter
	from urllib3.util.retry import Retry
	import logging

	log = logging.getLogger("agent.tools.web.v8.2")

	# ─────────────────────────────────────────────────────────────
	# CONFIG
	# ─────────────────────────────────────────────────────────────

	TIMEOUT_SEARCH = 8
	TIMEOUT_FETCH = 15
	MAX_RETRIES = 1
	BACKOFF_FACTOR = 0.3
	MAX_RESULTS = 6

	USER_AGENTS = [
	"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36",
	"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36",
	"Mozilla/5.0 (X11; Linux x86_64; rv:126.0) Gecko/20100101 Firefox/126.0",
	"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Edge/125.0.0.0",
	]

	# Bot UA — used for sites that block generic browser UAs from datacenter IPs.
	# Format follows Wikipedia's policy: <name>/<version> (<contact>)
	BOT_USER_AGENT = "NeuraPromptAgent/1.0 (https://neuraprompt.ai; contact@neuraprompt.ai)"

	# Sites that block generic browser UAs → must use bot UA.
	# Matched against the URL's netloc (case-insensitive substring).
	BOT_UA_SITES = (
	"wikipedia.org",
	"wikimedia.org",
	"wiktionary.org",
	"wikiquote.org",
	"wikibooks.org",
	"wikisource.org",
	"wikinews.org",
	"wikiversity.org",
	"mediawiki.org",
	"meta.wikimedia.org",
	"gov.uk",
	"nature.com",
	"sciencedirect.com",
	"springer.com",
	)

	SEARXNG_INSTANCES = [
	"https://searx.be",
	"https://search.ononoki.org",
	"https://searx.tiekoetter.com",
	"https://searx.prvcy.eu",
	"https://search.sapti.me",
	"https://darmarit.org/searx",
	"https://searxng.site",
	]

	# ─────────────────────────────────────────────────────────────
	# SUPPORTED SITES (per-site fetch handlers — "supported web URLs")
	# ─────────────────────────────────────────────────────────────
	# Each handler: (url, session) -> str (page text) or None to fall through
	# Add new sites here as needed. Handlers are tried BEFORE the generic fetcher.

	def _fetch_wikipedia(url: str, session: requests.Session) -> str \| None:
	"""Wikipedia REST API — reliable, structured, no scraping.
	Handles en.wikipedia.org/wiki/<Title> → /api/rest_v1/page/html/<Title>"""
	parsed = urlparse(url)
	if "wikipedia.org" not in parsed.netloc:
	return None
	path = parsed.path
	m = re.match(r"^/wiki/(.+)$", path)
	if not m:
	return None
	title = unquote(m.group(1))
	# Use the REST API for the same language edition
	lang = parsed.netloc.split(".")[0] or "en"
	api_url = f"https://{lang}.wikipedia.org/api/rest_v1/page/html/{quote_plus(title)}"
	resp = _safe_request(
	session, api_url,
	headers={"User-Agent": BOT_USER_AGENT, "Accept": "text/html"},
	timeout=TIMEOUT_FETCH,
	allow_redirects=True,
	)
	if not resp:
	return None
	soup = _make_soup(resp.text)
	# Strip the REST API's header/footer chrome
	for tag in soup(["script", "style", "nav", "header", "footer", "aside"]):
	tag.decompose()
	text = soup.get_text(" ", strip=True)
	if len(text) < 100:
	return None
	title_tag = soup.find("title")
	page_title = title_tag.get_text(strip=True) if title_tag else title
	return f"Title: {page_title}\nURL: {url}\nSource: Wikipedia REST API\n{'='*60}\n\n{text[:12000]}"


	def _fetch_github(url: str, session: requests.Session) -> str \| None:
	"""GitHub: rewrite /blob/ URLs to raw.githubusercontent.com for plain-text fetch.
	Also handles raw.githubusercontent.com directly."""
	parsed = urlparse(url)
	if "github.com" not in parsed.netloc and "raw.githubusercontent.com" not in parsed.netloc:
	return None

	raw_url = url
	# Convert: github.com/{user}/{repo}/blob/{ref}/{path} → raw.githubusercontent.com/{user}/{repo}/{ref}/{path}
	m = re.match(
	r"^https?://github\.com/([^/]+)/([^/]+)/blob/(.+)$",
	url,
	)
	if m:
	raw_url = f"https://raw.githubusercontent.com/{m.group(1)}/{m.group(2)}/{m.group(3)}"

	resp = _safe_request(
	session, raw_url,
	headers={"User-Agent": BOT_USER_AGENT},
	timeout=TIMEOUT_FETCH,
	allow_redirects=True,
	)
	if not resp:
	return None
	content_type = resp.headers.get("content-type", "").lower()
	text = resp.text
	if len(text) < 10:
	return None
	# For raw files, the response IS the file content (no HTML wrapper).
	if "text/html" not in content_type:
	return f"URL: {raw_url}\nContent-Type: {content_type}\n{'='*60}\n\n{text[:12000]}"
	# If it's somehow HTML (e.g. a repo page), strip tags.
	soup = _make_soup(text)
	for tag in soup(["script", "style", "nav", "header", "footer", "aside"]):
	tag.decompose()
	body = soup.get_text(" ", strip=True)
	if len(body) < 100:
	return None
	return f"URL: {url}\n{'='*60}\n\n{body[:12000]}"


	# Registry: netloc substring → handler. Tried in order; first match wins.
	SUPPORTED_SITES = [
	("wikipedia.org", _fetch_wikipedia),
	("wikimedia.org", _fetch_wikipedia),
	("github.com", _fetch_github),
	("raw.githubusercontent.com", _fetch_github),
	]


	# ─────────────────────────────────────────────────────────────
	# SESSION BUILDER
	# ─────────────────────────────────────────────────────────────

	def _build_session() -> requests.Session:
	"""Create a resilient session with retry logic."""
	session = requests.Session()

	retry_strategy = Retry(
	total=MAX_RETRIES,
	backoff_factor=BACKOFF_FACTOR,
	status_forcelist=[429, 500, 502, 503, 504],
	allowed_methods=["HEAD", "GET", "OPTIONS"],
	raise_on_status=False,
	)

	adapter = HTTPAdapter(max_retries=retry_strategy, pool_connections=10, pool_maxsize=10)
	session.mount("https://", adapter)
	session.mount("http://", adapter)

	return session


	# ─────────────────────────────────────────────────────────────
	# PARSER HELPER (lxml with html.parser fallback — portability)
	# ─────────────────────────────────────────────────────────────

	def _make_soup(html: str) -> BeautifulSoup:
	"""Parse HTML with lxml if available, else fall back to html.parser.
	The original code hardcoded 'lxml' which crashes if lxml isn't installed."""
	try:
	return BeautifulSoup(html, "lxml")
	except Exception:
	# lxml missing or parse error — html.parser is always available.
	return BeautifulSoup(html, "html.parser")


	# ─────────────────────────────────────────────────────────────
	# HEADERS (per-site UA strategy — fixes Wikipedia 403)
	# ─────────────────────────────────────────────────────────────

	def _ua_for_url(url: str) -> str:
	"""Pick a User-Agent appropriate for the target site.
	Sites in BOT_UA_SITES (Wikipedia, etc.) block generic browser UAs from
	datacenter IPs → must use a bot UA with contact info.
	Search engines need browser UAs → use a random browser UA."""
	try:
	netloc = urlparse(url).netloc.lower()
	except Exception:
	return random.choice(USER_AGENTS)
	for site in BOT_UA_SITES:
	if site in netloc:
	return BOT_USER_AGENT
	return random.choice(USER_AGENTS)


	def _headers(url: str = "", referer: str = "") -> dict:
	"""Generate realistic browser headers. UA is chosen per-target-site."""
	ua = _ua_for_url(url) if url else random.choice(USER_AGENTS)
	headers = {
	"User-Agent": ua,
	"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,/;q=0.8",
	"Accept-Language": "en-US,en;q=0.5",
	"Accept-Encoding": "gzip, deflate, br",
	"DNT": "1",
	"Connection": "keep-alive",
	"Upgrade-Insecure-Requests": "1",
	"Sec-Fetch-Dest": "document",
	"Sec-Fetch-Mode": "navigate",
	"Sec-Fetch-Site": "none" if not referer else "same-origin",
	"Sec-Fetch-User": "?1",
	"Cache-Control": "max-age=0",
	}
	if referer:
	headers["Referer"] = referer
	return headers


	# ─────────────────────────────────────────────────────────────
	# SAFE REQUEST (FIXED — accepts any 2xx, not just 200)
	# ─────────────────────────────────────────────────────────────

	def _safe_request(session: requests.Session, url: str, **kwargs) -> requests.Response \| None:
	"""
	Make a request with full error isolation.
	Returns the Response on any 2xx status (200-299), None on any failure.

	BUGFIX: The original only accepted status==200, which silently discarded
	DuckDuckGo's HTTP 202 responses — even though those responses contained
	valid JSON/HTML bodies. DDG returns 202 ('Accepted') as its normal
	success status for the lite and API endpoints.
	"""
	try:
	start = time.time()
	response = session.get(url, **kwargs)
	elapsed = time.time() - start

	log.debug(f"[{response.status_code}] {url[:80]}... ({elapsed:.2f}s)")

	# Accept any 2xx (200 OK, 201 Created, 202 Accepted, 204 No Content, etc.)
	if 200 <= response.status_code < 300:
	return response
	elif response.status_code in (403, 429):
	log.warning(f"Blocked [{response.status_code}]: {url[:80]}")
	else:
	log.warning(f"HTTP {response.status_code}: {url[:80]}")

	except requests.exceptions.SSLError as e:
	log.warning(f"SSL error for {url[:60]}: {e}")
	except requests.exceptions.ProxyError as e:
	log.warning(f"Proxy error for {url[:60]}: {e}")
	except requests.exceptions.ConnectionError as e:
	log.warning(f"Connection failed for {url[:60]}: {e}")
	except requests.exceptions.Timeout:
	log.warning(f"Timeout for {url[:60]}")
	except requests.exceptions.TooManyRedirects:
	log.warning(f"Redirect loop for {url[:60]}")
	except requests.exceptions.RequestException as e:
	log.warning(f"Request failed for {url[:60]}: {e}")
	except Exception as e:
	log.warning(f"Unexpected error for {url[:60]}: {e}")

	return None


	# ─────────────────────────────────────────────────────────────
	# SEARCH ENGINES
	# ─────────────────────────────────────────────────────────────

	def _ddg_decode_href(href: str) -> str:
	"""DDG HTML endpoint wraps external URLs as /l/?uddg=<encoded>.
	Decode them back to the original URL."""
	if not href:
	return ""
	if href.startswith("/"):
	href = f"https://html.duckduckgo.com{href}"
	if "uddg=" in href:
	try:
	qs = parse_qs(urlparse(href).query)
	return unquote(qs.get("uddg", [href])[0])
	except Exception:
	return href
	return href


	def _search_ddg_html(session: requests.Session, query: str) -> list[dict] \| None:
	"""Engine 1: DuckDuckGo HTML endpoint (replaces the broken DDG Lite engine).

	DDG Lite (lite.duckduckgo.com/lite/) returns 202 with stale selectors.
	DDG HTML (html.duckduckgo.com/html/) returns 200 with current
	`a.result__a` and `.result__snippet` selectors — reliable."""
	url = f"https://html.duckduckgo.com/html/?q={quote_plus(query)}"

	response = _safe_request(
	session, url,
	headers=_headers(url, referer="https://duckduckgo.com/"),
	timeout=TIMEOUT_SEARCH,
	allow_redirects=True,
	)
	if not response:
	return None

	soup = _make_soup(response.text)
	results = []

	for a in soup.select("a.result__a")[:MAX_RESULTS]:
	title = a.get_text(strip=True)
	raw_href = a.get("href", "")
	href = _ddg_decode_href(raw_href)
	if not title or not href:
	continue

	# Skip DDG's own ad/redirect URLs — they wrap ads as /y.js?ad_domain=...
	if "duckduckgo.com/y.js" in href or "duckduckgo.com/y.js" in raw_href:
	continue
	if not href.startswith(("http://", "https://")):
	continue

	# Snippet is a sibling element inside the same result wrapper.
	snippet = ""
	result_div = a.find_parent("div", class_="result") or a.find_parent("div")
	if result_div:
	snip_el = result_div.select_one(".result__snippet")
	if snip_el:
	snippet = snip_el.get_text(strip=True)

	results.append({
	"title": title,
	"url": href,
	"snippet": snippet,
	"engine": "ddg-html",
	})

	return results if results else None


	def _search_ddg_api(session: requests.Session, query: str) -> list[dict] \| None:
	"""Engine 2: DuckDuckGo Instant Answer API (JSON, no scraping).
	Now works because _safe_request accepts 202 responses."""
	url = f"https://api.duckduckgo.com/?q={quote_plus(query)}&format=json&no_html=1&skip_disambig=1"

	response = _safe_request(
	session, url,
	headers=_headers(url),
	timeout=TIMEOUT_SEARCH,
	)
	if not response:
	return None

	try:
	data = response.json()
	except Exception:
	return None

	results = []

	abstract = (data.get("AbstractText") or data.get("Answer") or "").strip()
	abstract_url = data.get("AbstractURL", "")
	if abstract and abstract_url:
	results.append({
	"title": data.get("Heading", "Quick Answer"),
	"url": abstract_url,
	"snippet": abstract,
	"engine": "ddg-api",
	})

	for topic in data.get("RelatedTopics", [])[:MAX_RESULTS - 1]:
	# Some RelatedTopics entries are nested topic groups — skip those.
	if not isinstance(topic, dict):
	continue
	text = (topic.get("Text") or "").strip()
	first_url = topic.get("FirstURL", "")
	if text and first_url:
	results.append({
	"title": text.split(" - ")[0] if " - " in text else text[:60],
	"url": first_url,
	"snippet": text,
	"engine": "ddg-api",
	})

	return results if results else None


	def _search_bing(session: requests.Session, query: str) -> list[dict] \| None:
	"""Engine 3: Bing HTML."""
	url = f"https://www.bing.com/search?q={quote_plus(query)}"

	response = _safe_request(
	session, url,
	headers=_headers(url, referer="https://www.bing.com/"),
	timeout=TIMEOUT_SEARCH + 3,
	allow_redirects=True,
	)
	if not response:
	return None

	soup = _make_soup(response.text)
	results = []

	for li in soup.select("li.b_algo")[:MAX_RESULTS]:
	a = li.select_one("a")
	if not a:
	continue

	title = a.get_text(strip=True)
	href = a.get("href", "")

	snippet = ""
	for sel in ["p", ".b_caption p", "div.b_attribution+div", ".b_snippet"]:
	el = li.select_one(sel)
	if el:
	snippet = el.get_text(strip=True)
	break

	if title and href and href.startswith("http"):
	results.append({
	"title": title,
	"url": href,
	"snippet": snippet,
	"engine": "bing",
	})

	return results if results else None


	def _search_searxng(session: requests.Session, query: str) -> list[dict] \| None:
	"""Engine 4: SearXNG public instances (last-resort fallback)."""
	instances = list(SEARXNG_INSTANCES)
	random.shuffle(instances)

	for base in instances[:3]:
	url = f"{base}/search?q={quote_plus(query)}&format=json&language=en"

	response = _safe_request(
	session, url,
	headers=_headers(url, referer=base),
	timeout=TIMEOUT_SEARCH + 5,
	)
	if not response:
	continue

	try:
	data = response.json()
	except Exception:
	continue

	results = []
	for r in data.get("results", [])[:MAX_RESULTS]:
	title = (r.get("title") or "").strip()
	href = (r.get("url") or "").strip()
	snippet = (r.get("content") or r.get("snippet") or "").strip()

	if title and href:
	results.append({
	"title": title,
	"url": href,
	"snippet": snippet,
	"engine": f"searxng-{base.split('//')[1].split('.')[0]}",
	})

	if results:
	return results

	return None


	# ─────────────────────────────────────────────────────────────
	# FETCH URL HELPERS (for fetch_url fallback chain)
	# ─────────────────────────────────────────────────────────────

	def _fetch_direct(url: str, session: requests.Session, ua: str) -> str \| None:
	"""Direct fetch with a specific User-Agent. Returns extracted text or None."""
	resp = _safe_request(
	session, url,
	headers={**_headers(url), "User-Agent": ua},
	timeout=TIMEOUT_FETCH,
	allow_redirects=True,
	)
	if not resp:
	return None

	content_type = resp.headers.get("content-type", "").lower()
	if "text/html" not in content_type and "text/plain" not in content_type and "application/json" not in content_type:
	# Non-text content — return a short preview.
	preview = resp.text[:4000] if resp.text else "[binary content]"
	return f"[Non-HTML content: {content_type}]\n\n{preview}"

	soup = _make_soup(resp.text)

	# Strip noise elements.
	for tag in soup(["script", "style", "nav", "header", "footer", "aside",
	"form", "iframe", "noscript", "svg", "canvas",
	"advertisement", ".ad", ".ads", ".cookie-banner"]):
	tag.decompose()

	content_blocks = []
	for selector in ["article", "main", "[role='main']", ".content", ".post", ".entry"]:
	for el in soup.select(selector):
	text = el.get_text(" ", strip=True)
	if len(text) > 300:
	content_blocks.append(text)

	if not content_blocks:
	for tag in soup.find_all(["h1", "h2", "h3", "h4", "p", "li", "td"]):
	text = tag.get_text(" ", strip=True)
	if len(text) > 30:
	content_blocks.append(text)

	seen = set()
	final_blocks = []
	for block in content_blocks:
	sig = block[:100].lower()
	if sig not in seen:
	seen.add(sig)
	final_blocks.append(block)

	text = "\n\n".join(final_blocks)
	if not text:
	return None

	title = ""
	title_tag = soup.find("title")
	if title_tag:
	title = title_tag.get_text(strip=True)

	header = (f"Title: {title}\nURL: {url}\n{'='*60}\n\n"
	if title else f"URL: {url}\n{'='*60}\n\n")
	return (header + text[:10000]).strip()


	def _fetch_archive_org(url: str, session: requests.Session) -> str \| None:
	"""Last-resort fallback: fetch via web.archive.org cached snapshot.
	Archive.org is rate-limit-friendly and ignores the origin site's bot blocks."""
	api_url = f"https://archive.org/wayback/available?url={quote_plus(url)}"
	resp = _safe_request(
	session, api_url,
	headers={"User-Agent": BOT_USER_AGENT},
	timeout=TIMEOUT_FETCH,
	)
	if not resp:
	return None
	try:
	data = resp.json()
	except Exception:
	return None
	snapshots = data.get("archived_snapshots") or {}
	closest = snapshots.get("closest") or {}
	archive_url = closest.get("url")
	if not archive_url or not closest.get("available"):
	return None
	# Fetch the archived snapshot.
	return _fetch_direct(archive_url, session, BOT_USER_AGENT)


	# ─────────────────────────────────────────────────────────────
	# PUBLIC API
	# ─────────────────────────────────────────────────────────────

	def web_search(query: str) -> str:
	"""
	Search the web using multiple no-key engines with automatic fallback.
	Priority: DDG HTML → DDG API → Bing → SearXNG
	"""
	if not query or not query.strip():
	return "Error: search query cannot be empty."

	query = query.strip()
	session = _build_session()

	engines = [
	("DuckDuckGo HTML", _search_ddg_html),
	("DuckDuckGo API", _search_ddg_api),
	("Bing", _search_bing),
	("SearXNG", _search_searxng),
	]

	all_errors = []

	for name, engine_func in engines:
	try:
	log.info(f"Trying {name} for: {query[:50]}...")
	results = engine_func(session, query)

	if results:
	formatted = []
	for r in results:
	line = f"• {r['title']}"
	if r.get("snippet"):
	line += f"\n{r['snippet']}"
	line += f"\n🔗 {r['url']}"
	formatted.append(line)

	footer = f"\n\n[Results via {name} \| {len(results)} found]"
	return "\n\n".join(formatted) + footer

	except Exception as e:
	log.error(f"Critical error in {name}: {e}")
	all_errors.append(f"{name}: {str(e)}")
	continue

	# All engines failed
	error_detail = " \| ".join(all_errors) if all_errors else "All engines returned no results."
	return (
	f"Search failed for: '{query}'\n"
	f"All fallback engines exhausted.\n"
	f"Details: {error_detail}\n"
	f"Tip: Check your internet connection or try again later."
	)


	def fetch_url(url: str) -> str:
	"""
	Fetch a webpage with per-site handlers and a 4-step fallback chain:
	1. Site-specific handler (Wikipedia REST API, GitHub raw, etc.)
	2. Direct fetch with bot UA (for sites that block browser UAs)
	3. Direct fetch with browser UA (for sites that block bot UAs)
	4. web.archive.org cached snapshot (last resort)
	"""
	if not url or not url.strip():
	return "Error: URL cannot be empty."

	url = url.strip()
	if not url.startswith(("http://", "https://")):
	return "Error: URL must start with http:// or https://"

	session = _build_session()

	# ---- Step 1: Try site-specific handlers (SUPPORTED_SITES) ----
	try:
	netloc = urlparse(url).netloc.lower()
	except Exception:
	netloc = ""
	for site_pattern, handler in SUPPORTED_SITES:
	if site_pattern in netloc:
	try:
	log.info(f"Trying site handler for '{site_pattern}': {url[:80]}")
	result = handler(url, session)
	if result:
	return result
	except Exception as e:
	log.warning(f"Site handler {site_pattern} failed: {e}")
	# Fall through to generic fetch
	break # only try the first matching handler

	# ---- Step 2: Direct fetch with bot UA ----
	log.info(f"Trying direct fetch (bot UA): {url[:80]}")
	result = _fetch_direct(url, session, BOT_USER_AGENT)
	if result:
	return result

	# ---- Step 3: Direct fetch with browser UA ----
	log.info(f"Trying direct fetch (browser UA): {url[:80]}")
	browser_ua = random.choice(USER_AGENTS)
	result = _fetch_direct(url, session, browser_ua)
	if result:
	return result

	# ---- Step 4: web.archive.org cache ----
	log.info(f"Trying archive.org cache: {url[:80]}")
	result = _fetch_archive_org(url, session)
	if result:
	return f"[Fetched via archive.org cache]\n\n{result}"

	# All fallbacks exhausted
	return (
	f"Could not fetch URL: {url}\n"
	"All fetch strategies failed (site handler, bot UA, browser UA, archive.org).\n"
	"The site may require JavaScript, require authentication, or be offline."
	)


	# ─────────────────────────────────────────────────────────────
	# TEST / DEBUG
	# ─────────────────────────────────────────────────────────────

	if __name__ == "__main__":
	import logging
	logging.basicConfig(level=logging.INFO)
	print("=" * 60)
	print("TEST: web_search('Python programming language')")
	print("=" * 60)
	print(web_search("Python programming language"))
	print("\n" + "=" * 60)
	print("TEST: fetch_url('https://en.wikipedia.org/wiki/Python_(programming_language)')")
	print("=" * 60)
	print(fetch_url("https://en.wikipedia.org/wiki/Python_(programming_language)")[:1500])
	print("\n" + "=" * 60)
	print("TEST: fetch_url('https://github.com/python/cpython/blob/main/README.rst')")
	print("=" * 60)
	print(fetch_url("https://github.com/python/cpython/blob/main/README.rst")[:1500])