gk2410 commited on
Commit
4e101bd
·
verified ·
1 Parent(s): 43c01d3

Create news_sources.py

Browse files
Files changed (1) hide show
  1. news_sources.py +27 -0
news_sources.py ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import feedparser
2
+ import uuid
3
+ import json
4
+
5
+ RSS_FEEDS = [
6
+ "https://rss.nytimes.com/services/xml/rss/nyt/World.xml",
7
+ "https://www.reutersagency.com/feed/?best-topics=natural-disasters&post_type=best",
8
+ "https://www.aljazeera.com/xml/rss/all.xml"
9
+ ]
10
+
11
+ def fetch_recent_articles():
12
+ articles = []
13
+ for url in RSS_FEEDS:
14
+ feed = feedparser.parse(url)
15
+ for entry in feed.entries[:5]:
16
+ articles.append({
17
+ "id": str(uuid.uuid4()),
18
+ "title": entry.title,
19
+ "content": entry.get("summary", "No summary available.")
20
+ })
21
+ return articles
22
+
23
+ # Save articles for embedding
24
+ if __name__ == "__main__":
25
+ data = fetch_recent_articles()
26
+ with open("sample_articles.json", "w") as f:
27
+ json.dump(data, f, indent=2)