from utils import ensure_package # Ensure runtime dependencies requests = ensure_package("requests") bs4 = ensure_package("beautifulsoup4", "bs4") mistune = ensure_package("mistune") def scrape_to_markdown(url: str) -> str: """ Fetches a webpage, extracts visible text, and converts it to markdown. """ response = requests.get(url, timeout=10) response.raise_for_status() from bs4 import BeautifulSoup soup = BeautifulSoup(response.text, "html.parser") # Extract text from main tags elements = [] for tag in soup.find_all(["h1", "h2", "h3", "p", "li"]): if tag.name.startswith("h"): level = int(tag.name[1]) elements.append("#" * level + " " + tag.get_text(strip=True)) elif tag.name == "li": elements.append(f"- {tag.get_text(strip=True)}") else: elements.append(tag.get_text(strip=True)) md_text = "\n\n".join(elements) # Convert markdown to HTML for preview markdown_parser = mistune.create_markdown(renderer=mistune.HTMLRenderer()) html_preview = markdown_parser(md_text) return md_text, html_preview