Spaces:

islasher
/

ai-meme-generator

Sleeping

ai-meme-generator / news_scanner.py

Update news_scanner.py

8e2d8c3 verified about 1 month ago

1.25 kB

	import requests
	from bs4 import BeautifulSoup

	# def scan_news2():
	# url = "https://www.reuters.com/world/"
	# r = requests.get(url)
	# soup = BeautifulSoup(r.text, "html.parser")

	# # headlines = [h.get_text().strip() for h in soup.select("h3.story-title, h2")]


	# heading_element = soup.find("span", {"data-testid": "TitleHeading"})

	# if heading_element:
	# heading_text = heading_element.get_text(strip=True)
	# print("Heading:", heading_text)
	# else:
	# print("Heading not found")



	# print('SE SUPONE QUE ES EL TITULAR,')
	# print(headlines[:2])
	# return headlines[:5] # top5 titulares


	def scan_news():
	headers = {'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/85.0.4183.121 Safari/537.36'}

	url = "https://www.abc.net.au/news"#"https://www.news.com.au/"#"https://www.nytimes.com/"
	r = requests.get(url, headers=headers)
	r_html = r.text
	# print(r_html)
	# print('----------------------------')
	soup = BeautifulSoup(r_html, "html.parser")
	scripts = soup.find_all('a')
	headlines = [h.get_text().strip() for h in scripts if len(h.get_text().split(' '))>=5]
	return headlines[:1]