NEWS_SCRAPING / scraper.py
sakshi116's picture
Upload 5 files
0f779b0 verified
raw
history blame contribute delete
856 Bytes
import requests
from bs4 import BeautifulSoup
def get_headings(url):
response = requests.get(url)
soup = BeautifulSoup(response.text, 'html.parser')
headings = [h.get_text(strip=True) for h in soup.find_all(['h2', 'h3'])[:10]]
return headings
def scrape_news(url, heading=None):
response = requests.get(url)
soup = BeautifulSoup(response.text, 'html.parser')
articles = soup.find_all('article')[:6]
news = []
for art in articles:
title = art.get_text(strip=True)
link = art.find('a')['href'] if art.find('a') else url
image = art.find('img')['src'] if art.find('img') else ''
news.append({
'title': title,
'summary': f'Brief summary about {title[:40]}...',
'link': link,
'image': image
})
return news