Spaces:

mishrabp
/

mcp-github

Running

mcp-github / src /mcp-web /tools /extract.py

Upload folder using huggingface_hub

29574e5 verified 8 days ago

684 Bytes


	"""
	Extract content from URL
	"""
	import requests
	from bs4 import BeautifulSoup
	import html2text

	def extract_content(url: str) -> str:
	"""
	Extracts text content from a given URL.
	"""
	try:
	response = requests.get(url, timeout=10)
	soup = BeautifulSoup(response.content, 'html.parser')

	# Remove script and style elements
	for script in soup(["script", "style"]):
	script.extract()

	h = html2text.HTML2Text()
	h.ignore_links = True
	text = h.handle(str(soup))
	return text
	except Exception as e:
	return f"Error extracting content: {str(e)}"