Spaces:

aka7774
/

ddg_bs4

Sleeping

ddg_bs4 / fn.py

Update fn.py

5a2da72 verified almost 2 years ago

978 Bytes

	import os

	from duckduckgo_search import DDGS
	import requests
	from bs4 import BeautifulSoup

	def run(text):
	results = ddg(text)
	url = results[0]['href']
	text = bs4(url)
	return text, results

	def ddg(text, max_results = 5):
	with DDGS() as ddgs:
	results = [r for r in ddgs.text(text, max_results=max_results)]
	return results

	def bs4(url):
	html = requests.get(url).text
	soup = BeautifulSoup(html, features="html.parser")

	# kill all script and style elements
	for script in soup(["script", "style"]):
	script.extract() # rip it out

	# get text
	text = soup.get_text()

	# break into lines and remove leading and trailing space on each
	lines = (line.strip() for line in text.splitlines())
	# break multi-headlines into a line each
	chunks = (phrase.strip() for line in lines for phrase in line.split(" "))
	# drop blank lines
	text = '\n'.join(chunk for chunk in chunks if chunk)

	return text