import requests from bs4 import BeautifulSoup def scrape_blog(url="https://indianexpress.com/", tag="h2"): try: headers = { "User-Agent": ( "Mozilla/5.0 (Windows NT 10.0; Win64; x64) " "AppleWebKit/537.36 (KHTML, like Gecko) " "Chrome/91.0.4472.124 Safari/537.36" ) } response = requests.get(url, headers=headers, timeout=10) response.raise_for_status() soup = BeautifulSoup(response.text, "html.parser") titles = [] for element in soup.find_all(tag): text = element.get_text(strip=True) if text and len(text) > 20: titles.append(text) clean_titles = list(dict.fromkeys(titles)) if not clean_titles: return [f"No <{tag}> content found. Try another tag."] return clean_titles[:25] except Exception as e: return [f"Error: {e}"]