|
|
import feedparser |
|
|
import pickle |
|
|
import os |
|
|
import time |
|
|
from datetime import datetime |
|
|
from typing import Tuple, Any, Optional |
|
|
|
|
|
|
|
|
|
|
|
def format_published_time(published_parsed: Optional[time.struct_time]) -> str: |
|
|
"""Safely converts a feedparser time struct to a formatted string.""" |
|
|
if published_parsed: |
|
|
try: |
|
|
dt_obj = datetime.fromtimestamp(time.mktime(published_parsed)) |
|
|
return dt_obj.strftime('%Y-%m-%d %H:%M') |
|
|
except Exception: |
|
|
return 'N/A' |
|
|
return 'N/A' |
|
|
|
|
|
def load_feed_from_cache(config: Any) -> Tuple[Optional[Any], str]: |
|
|
"""Attempts to load a feed object from the cache file if it exists and is not expired.""" |
|
|
if not os.path.exists(config.CACHE_FILE): |
|
|
return None, "Cache file not found." |
|
|
|
|
|
try: |
|
|
|
|
|
file_age_seconds = time.time() - os.path.getmtime(config.CACHE_FILE) |
|
|
|
|
|
if file_age_seconds > config.CACHE_DURATION_SECONDS: |
|
|
|
|
|
return None, f"Cache expired ({file_age_seconds:.0f}s old, limit is {config.CACHE_DURATION_SECONDS}s)." |
|
|
|
|
|
with open(config.CACHE_FILE, 'rb') as f: |
|
|
feed = pickle.load(f) |
|
|
return feed, f"Loaded successfully from cache (Age: {file_age_seconds:.0f}s)." |
|
|
|
|
|
except Exception as e: |
|
|
|
|
|
print(f"Warning: Failed to load cache file. Deleting corrupted cache. Reason: {e}") |
|
|
try: |
|
|
os.remove(config.CACHE_FILE) |
|
|
except OSError: |
|
|
pass |
|
|
return None, "Cache file corrupted or invalid. Will re-fetch." |
|
|
|
|
|
def save_feed_to_cache(config: Any, feed: Any) -> None: |
|
|
"""Saves the fetched feed object to the cache file.""" |
|
|
try: |
|
|
with open(config.CACHE_FILE, 'wb') as f: |
|
|
pickle.dump(feed, f) |
|
|
print(f"Successfully saved new feed data to cache: {config.CACHE_FILE}") |
|
|
except Exception as e: |
|
|
print(f"Error saving to cache: {e}") |
|
|
|
|
|
def read_hacker_news_rss(config: Any) -> Tuple[Optional[Any], str]: |
|
|
""" |
|
|
Reads and parses the Hacker News RSS feed, using a cache if available. |
|
|
Returns the feedparser object and a status message. |
|
|
""" |
|
|
url = config.HN_RSS_URL |
|
|
print(f"Attempting to fetch and parse RSS feed from: {url}") |
|
|
print("-" * 50) |
|
|
|
|
|
|
|
|
feed, cache_status = load_feed_from_cache(config) |
|
|
print(f"Cache Status: {cache_status}") |
|
|
|
|
|
|
|
|
if feed is None: |
|
|
print("Starting network fetch...") |
|
|
try: |
|
|
|
|
|
feed = feedparser.parse(url) |
|
|
|
|
|
if feed.status >= 400: |
|
|
status_msg = f"Error fetching the feed. HTTP Status: {feed.status}" |
|
|
print(status_msg) |
|
|
return None, status_msg |
|
|
|
|
|
if feed.bozo: |
|
|
|
|
|
print(f"Warning: Failed to fully parse the feed. Reason: {feed.get('bozo_exception')}") |
|
|
|
|
|
|
|
|
if feed.entries: |
|
|
save_feed_to_cache(config, feed) |
|
|
status_msg = f"Successfully fetched and cached {len(feed.entries)} entries." |
|
|
else: |
|
|
status_msg = "Fetch successful, but no entries found in the feed." |
|
|
print(status_msg) |
|
|
feed = None |
|
|
|
|
|
except Exception as e: |
|
|
status_msg = f"An unexpected error occurred during network processing: {e}" |
|
|
print(status_msg) |
|
|
return None, status_msg |
|
|
|
|
|
else: |
|
|
status_msg = cache_status |
|
|
|
|
|
return feed, status_msg |
|
|
|
|
|
|
|
|
if __name__ == '__main__': |
|
|
from .config import AppConfig |
|
|
feed, status = read_hacker_news_rss(AppConfig) |
|
|
if feed and feed.entries: |
|
|
print(f"\nFetched {len(feed.entries)} entries. Top 3 titles:") |
|
|
for entry in feed.entries[:3]: |
|
|
print(f"- {entry.title}") |
|
|
else: |
|
|
print(f"Could not fetch the feed. Status: {status}") |
|
|
|