File size: 1,378 Bytes
796e8b5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
import requests
from bs4 import BeautifulSoup
import json
import datetime

# Replace with the correct GUID News URL
URL = "https://www.theguidenews.com/latest-news"  

def scrape_guid_news():
    response = requests.get(URL, headers={"User-Agent": "Mozilla/5.0"})
    soup = BeautifulSoup(response.content, "lxml")

    news_list = []
    articles = soup.find_all("div", class_="news-card")  # Adjust class based on HTML structure

    for article in articles:
        try:
            title = article.find("h2").get_text(strip=True)
            link = article.find("a")["href"]
            image_tag = article.find("img")
            image = image_tag["src"] if image_tag else None
            summary = article.find("p").get_text(strip=True)
            date = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")

            news_list.append({
                "title": title,
                "link": link,
                "image": image,
                "summary": summary,
                "date": date
            })
        except Exception as e:
            print(f"Skipping one article due to error: {e}")
            continue

    with open("news.json", "w", encoding="utf-8") as f:
        json.dump(news_list, f, indent=4, ensure_ascii=False)

    print(f"[+] Scraped {len(news_list)} news articles successfully!")

if __name__ == "__main__":
    scrape_guid_news()