Spaces:
Sleeping
Sleeping
Update rss_processor.py
Browse files- rss_processor.py +9 -3
rss_processor.py
CHANGED
|
@@ -46,7 +46,8 @@ def clean_text(text):
|
|
| 46 |
|
| 47 |
def fetch_rss_feeds():
|
| 48 |
articles = []
|
| 49 |
-
|
|
|
|
| 50 |
try:
|
| 51 |
with open(FEEDS_FILE, 'r') as f:
|
| 52 |
feed_categories = json.load(f)
|
|
@@ -69,8 +70,13 @@ def fetch_rss_feeds():
|
|
| 69 |
continue
|
| 70 |
|
| 71 |
for entry in feed.entries[:MAX_ARTICLES_PER_FEED]:
|
| 72 |
-
title = entry.get("title", "No Title")
|
| 73 |
link = entry.get("link", "")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 74 |
description_raw = entry.get("summary", entry.get("description", ""))
|
| 75 |
description = clean_text(description_raw)
|
| 76 |
|
|
@@ -114,7 +120,7 @@ def fetch_rss_feeds():
|
|
| 114 |
except Exception as e:
|
| 115 |
logger.error(f"Error fetching or parsing {feed_url}: {e}")
|
| 116 |
|
| 117 |
-
logger.info(f"Total articles fetched: {len(articles)}")
|
| 118 |
return articles
|
| 119 |
|
| 120 |
def process_and_store_articles(articles):
|
|
|
|
| 46 |
|
| 47 |
def fetch_rss_feeds():
|
| 48 |
articles = []
|
| 49 |
+
seen_links = set()
|
| 50 |
+
|
| 51 |
try:
|
| 52 |
with open(FEEDS_FILE, 'r') as f:
|
| 53 |
feed_categories = json.load(f)
|
|
|
|
| 70 |
continue
|
| 71 |
|
| 72 |
for entry in feed.entries[:MAX_ARTICLES_PER_FEED]:
|
|
|
|
| 73 |
link = entry.get("link", "")
|
| 74 |
+
if not link or link in seen_links:
|
| 75 |
+
continue
|
| 76 |
+
|
| 77 |
+
seen_links.add(link)
|
| 78 |
+
|
| 79 |
+
title = entry.get("title", "No Title")
|
| 80 |
description_raw = entry.get("summary", entry.get("description", ""))
|
| 81 |
description = clean_text(description_raw)
|
| 82 |
|
|
|
|
| 120 |
except Exception as e:
|
| 121 |
logger.error(f"Error fetching or parsing {feed_url}: {e}")
|
| 122 |
|
| 123 |
+
logger.info(f"Total unique articles fetched: {len(articles)}")
|
| 124 |
return articles
|
| 125 |
|
| 126 |
def process_and_store_articles(articles):
|