Spaces:
Runtime error
Runtime error
| import requests | |
| from string import Template | |
| from bs4 import BeautifulSoup | |
| from dotenv import load_dotenv | |
| from os import getenv | |
| import threading | |
| load_dotenv() | |
| google_key = getenv('GOOGLE_KEY') | |
| google_engine = getenv('GOOGLE_ENGINE') | |
| url = Template(f'https://www.googleapis.com/customsearch/v1?key={google_key}&cx={google_engine}&q=$query') | |
| def download_page(url, responses, index): | |
| responses[index] = requests.get(url) | |
| def process_page(texts, responses, index): | |
| resp = responses[index] | |
| soup = BeautifulSoup(resp.text, 'html.parser') | |
| text = [] | |
| # remove lists | |
| for tag in soup.find_all('li'): | |
| tag.extract() | |
| tags = soup.find_all('p') | |
| for tag in tags: | |
| text.append(tag.text) | |
| texts.append('\n'.join(text)) | |
| def search_web(query: str) -> list: | |
| query = '+'.join(query.split()) | |
| results = requests.get(url.substitute(query=query)).json()['items'] | |
| links = [item['link'] for item in results] | |
| texts = [] | |
| responses = [None] * len(links) | |
| download_threads = [None] * len(links) | |
| processing_threads = [None] * len(links) | |
| # dowload_threads[0] = threading.Thread(target=download_page, args=(links[0], responses, 0)) | |
| download_page(links[0], responses, 0) | |
| for i in range(1, len(links), 2): | |
| # new page processing thread | |
| processing_thread = threading.Thread(target=process_page, args=(texts, responses, i-1)) | |
| # new download thread | |
| download_thread = threading.Thread(target=download_page, args=(links[i], responses, i)) | |
| # start threads | |
| download_thread.start() | |
| processing_thread.start() | |
| download_thread.join() | |
| processing_thread.join() | |
| return texts |