ai-meme-generator / news_scanner.py
islasher's picture
Update news_scanner.py
8e2d8c3 verified
import requests
from bs4 import BeautifulSoup
# def scan_news2():
# url = "https://www.reuters.com/world/"
# r = requests.get(url)
# soup = BeautifulSoup(r.text, "html.parser")
# # headlines = [h.get_text().strip() for h in soup.select("h3.story-title, h2")]
# heading_element = soup.find("span", {"data-testid": "TitleHeading"})
# if heading_element:
# heading_text = heading_element.get_text(strip=True)
# print("Heading:", heading_text)
# else:
# print("Heading not found")
# print('SE SUPONE QUE ES EL TITULAR,')
# print(headlines[:2])
# return headlines[:5] # top5 titulares
def scan_news():
headers = {'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/85.0.4183.121 Safari/537.36'}
url = "https://www.abc.net.au/news"#"https://www.news.com.au/"#"https://www.nytimes.com/"
r = requests.get(url, headers=headers)
r_html = r.text
# print(r_html)
# print('----------------------------')
soup = BeautifulSoup(r_html, "html.parser")
scripts = soup.find_all('a')
headlines = [h.get_text().strip() for h in scripts if len(h.get_text().split(' '))>=5]
return headlines[:1]