Spaces:

simran40
/

webscrapping

Runtime error

File size: 950 Bytes

0f22445

import requests
from bs4 import BeautifulSoup

def scrape_blog(url="https://indianexpress.com/", tag="h2"):
    try:
        headers = {
            "User-Agent": (
                "Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
                "AppleWebKit/537.36 (KHTML, like Gecko) "
                "Chrome/91.0.4472.124 Safari/537.36"
            )
        }
        response = requests.get(url, headers=headers, timeout=10)
        response.raise_for_status()

        soup = BeautifulSoup(response.text, "html.parser")

        titles = []
        for element in soup.find_all(tag):
            text = element.get_text(strip=True)
            if text and len(text) > 20:
                titles.append(text)

        clean_titles = list(dict.fromkeys(titles))
        if not clean_titles:
            return [f"No <{tag}> content found. Try another tag."]
        return clean_titles[:25]
    except Exception as e:
        return [f"Error: {e}"]