Spaces:
Runtime error
Runtime error
| # Created by Leandro Carneiro at 19/01/2024 | |
| # Description: | |
| # ------------------------------------------------ | |
| import os.path | |
| import time | |
| from googleapiclient.discovery import build | |
| import requests | |
| from bs4 import BeautifulSoup | |
| import constants | |
| def google_search_api(search_term, api_key, cse_id, **kwargs): | |
| try: | |
| service = build("customsearch", "v1", developerKey=api_key) | |
| res = service.cse().list(q=search_term, cx=cse_id, **kwargs).execute() | |
| return res['items'] | |
| except Exception as e: | |
| return -1 | |
| def search_google(subject, sites): | |
| try: | |
| results = [] | |
| for site in sites: | |
| print(' Buscando notícias no domínio: ' + site) | |
| query = f"{subject} site:{site}" | |
| sites_searched = google_search_api(query, os.environ['GOOGLE_KEY'], os.environ['GOOGLE_SEARCH'], num=constants.num_sites) | |
| if sites_searched == -1: | |
| results.append(site) | |
| else: | |
| for s in sites_searched: | |
| if 'pdf' not in s['link'].lower(): | |
| results.append(s['link']) | |
| else: | |
| print(' Arquivo PDF encontrado: ' + s['link']) | |
| #time.sleep(3) | |
| print(' Total de sites encontrados: ' + str(len(results))) | |
| return results | |
| except Exception as e: | |
| print(str(e)) | |
| return str(e) | |
| def retrieve_text_from_site(sites): | |
| result = [] | |
| for site in sites: | |
| print(' Baixando texto do site: ' + site) | |
| try: | |
| response = requests.get(site) | |
| response.raise_for_status() | |
| soup = BeautifulSoup(response.content, 'html.parser') | |
| result.append(soup.get_text()) | |
| except Exception as e: | |
| result.append('Erro na recuperação do texto: ' + str(e)) | |
| return result | |
| def delete_base(local_base): | |
| try: | |
| for i in os.listdir(local_base): | |
| file_path = os.path.join(local_base, i) | |
| os.remove(file_path) | |
| return 0 | |
| except Exception as e: | |
| return str(e) | |
| def save_on_base(sites, texts, local_base): | |
| try: | |
| for i in range(len(sites)): | |
| filename = f'news{i}.txt' | |
| with open(os.path.join(local_base, filename), 'w', encoding='utf-8') as file: | |
| file.write(texts[i]) | |
| with open(os.path.join(local_base, 'filename_url.csv'), 'a', encoding='utf-8') as file: | |
| file.write(filename + ';' + sites[i] + '\n') | |
| return 0 | |
| except Exception as e: | |
| return str(e) | |