Spaces:
Running
Running
| import requests | |
| from bs4 import BeautifulSoup | |
| def get_headings(url): | |
| response = requests.get(url) | |
| soup = BeautifulSoup(response.text, 'html.parser') | |
| headings = [h.get_text(strip=True) for h in soup.find_all(['h2', 'h3'])[:10]] | |
| return headings | |
| def scrape_news(url, heading=None): | |
| response = requests.get(url) | |
| soup = BeautifulSoup(response.text, 'html.parser') | |
| articles = soup.find_all('article')[:6] | |
| news = [] | |
| for art in articles: | |
| title = art.get_text(strip=True) | |
| link = art.find('a')['href'] if art.find('a') else url | |
| image = art.find('img')['src'] if art.find('img') else '' | |
| news.append({ | |
| 'title': title, | |
| 'summary': f'Brief summary about {title[:40]}...', | |
| 'link': link, | |
| 'image': image | |
| }) | |
| return news | |