Spaces:
Sleeping
Sleeping
| import requests | |
| from bs4 import BeautifulSoup | |
| # def scan_news2(): | |
| # url = "https://www.reuters.com/world/" | |
| # r = requests.get(url) | |
| # soup = BeautifulSoup(r.text, "html.parser") | |
| # # headlines = [h.get_text().strip() for h in soup.select("h3.story-title, h2")] | |
| # heading_element = soup.find("span", {"data-testid": "TitleHeading"}) | |
| # if heading_element: | |
| # heading_text = heading_element.get_text(strip=True) | |
| # print("Heading:", heading_text) | |
| # else: | |
| # print("Heading not found") | |
| # print('SE SUPONE QUE ES EL TITULAR,') | |
| # print(headlines[:2]) | |
| # return headlines[:5] # top5 titulares | |
| def scan_news(): | |
| headers = {'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/85.0.4183.121 Safari/537.36'} | |
| url = "https://www.abc.net.au/news"#"https://www.news.com.au/"#"https://www.nytimes.com/" | |
| r = requests.get(url, headers=headers) | |
| r_html = r.text | |
| # print(r_html) | |
| # print('----------------------------') | |
| soup = BeautifulSoup(r_html, "html.parser") | |
| scripts = soup.find_all('a') | |
| headlines = [h.get_text().strip() for h in scripts if len(h.get_text().split(' '))>=5] | |
| return headlines[:1] | |