Spaces:

sakshi116
/

NEWS_SCRAPING

Running

NEWS_SCRAPING / scraper.py

Upload 5 files

0f779b0 verified 2 months ago

856 Bytes

	import requests
	from bs4 import BeautifulSoup

	def get_headings(url):
	response = requests.get(url)
	soup = BeautifulSoup(response.text, 'html.parser')
	headings = [h.get_text(strip=True) for h in soup.find_all(['h2', 'h3'])[:10]]
	return headings

	def scrape_news(url, heading=None):
	response = requests.get(url)
	soup = BeautifulSoup(response.text, 'html.parser')
	articles = soup.find_all('article')[:6]
	news = []
	for art in articles:
	title = art.get_text(strip=True)
	link = art.find('a')['href'] if art.find('a') else url
	image = art.find('img')['src'] if art.find('img') else ''
	news.append({
	'title': title,
	'summary': f'Brief summary about {title[:40]}...',
	'link': link,
	'image': image
	})
	return news