Spaces:
Running
Running
Update rss_processor.py
Browse files- rss_processor.py +6 -0
rss_processor.py
CHANGED
|
@@ -7,11 +7,13 @@ import logging
|
|
| 7 |
from huggingface_hub import HfApi, login
|
| 8 |
import shutil
|
| 9 |
import rss_feeds
|
|
|
|
| 10 |
# Setup logging
|
| 11 |
logging.basicConfig(level=logging.INFO)
|
| 12 |
logger = logging.getLogger(__name__)
|
| 13 |
|
| 14 |
# Constants
|
|
|
|
| 15 |
LOCAL_DB_DIR = "chroma_db"
|
| 16 |
RSS_FEEDS = rss_feeds.RSS_FEEDS
|
| 17 |
|
|
@@ -36,7 +38,10 @@ def fetch_rss_feeds():
|
|
| 36 |
if feed.bozo:
|
| 37 |
logger.warning(f"Parse error for {feed_url}: {feed.bozo_exception}")
|
| 38 |
continue
|
|
|
|
| 39 |
for entry in feed.entries:
|
|
|
|
|
|
|
| 40 |
title = entry.get("title", "No Title").strip()
|
| 41 |
link = entry.get("link", "").strip()
|
| 42 |
description = entry.get("summary", entry.get("description", "No Description"))
|
|
@@ -54,6 +59,7 @@ def fetch_rss_feeds():
|
|
| 54 |
"category": categorize_feed(feed_url),
|
| 55 |
"image": image,
|
| 56 |
})
|
|
|
|
| 57 |
except Exception as e:
|
| 58 |
logger.error(f"Error fetching {feed_url}: {e}")
|
| 59 |
logger.info(f"Total articles fetched: {len(articles)}")
|
|
|
|
| 7 |
from huggingface_hub import HfApi, login
|
| 8 |
import shutil
|
| 9 |
import rss_feeds
|
| 10 |
+
|
| 11 |
# Setup logging
|
| 12 |
logging.basicConfig(level=logging.INFO)
|
| 13 |
logger = logging.getLogger(__name__)
|
| 14 |
|
| 15 |
# Constants
|
| 16 |
+
MAX_ARTICLES_PER_FEED = 5 # Set to 5 for testing, increase later as needed
|
| 17 |
LOCAL_DB_DIR = "chroma_db"
|
| 18 |
RSS_FEEDS = rss_feeds.RSS_FEEDS
|
| 19 |
|
|
|
|
| 38 |
if feed.bozo:
|
| 39 |
logger.warning(f"Parse error for {feed_url}: {feed.bozo_exception}")
|
| 40 |
continue
|
| 41 |
+
article_count = 0
|
| 42 |
for entry in feed.entries:
|
| 43 |
+
if article_count >= MAX_ARTICLES_PER_FEED:
|
| 44 |
+
break
|
| 45 |
title = entry.get("title", "No Title").strip()
|
| 46 |
link = entry.get("link", "").strip()
|
| 47 |
description = entry.get("summary", entry.get("description", "No Description"))
|
|
|
|
| 59 |
"category": categorize_feed(feed_url),
|
| 60 |
"image": image,
|
| 61 |
})
|
| 62 |
+
article_count += 1
|
| 63 |
except Exception as e:
|
| 64 |
logger.error(f"Error fetching {feed_url}: {e}")
|
| 65 |
logger.info(f"Total articles fetched: {len(articles)}")
|