| import os |
|
|
| from duckduckgo_search import DDGS |
| import requests |
| from bs4 import BeautifulSoup |
|
|
| def run(text): |
| results = ddg(text) |
| url = results[0]['href'] |
| text = bs4(url) |
| return text, results |
|
|
| def ddg(text, max_results = 5): |
| with DDGS() as ddgs: |
| results = [r for r in ddgs.text(text, max_results=max_results)] |
| return results |
|
|
| def bs4(url): |
| html = requests.get(url).text |
| soup = BeautifulSoup(html, features="html.parser") |
|
|
| |
| for script in soup(["script", "style"]): |
| script.extract() |
|
|
| |
| text = soup.get_text() |
|
|
| |
| lines = (line.strip() for line in text.splitlines()) |
| |
| chunks = (phrase.strip() for line in lines for phrase in line.split(" ")) |
| |
| text = '\n'.join(chunk for chunk in chunks if chunk) |
|
|
| return text |
|
|