Spaces:
Sleeping
Sleeping
| import feedparser | |
| import pickle | |
| import os | |
| import time | |
| from datetime import datetime | |
| from typing import Tuple, Any, Optional | |
| # Assuming AppConfig is passed in via dependency injection in the refactored main app. | |
| def format_published_time(published_parsed: Optional[time.struct_time]) -> str: | |
| """Safely converts a feedparser time struct to a formatted string.""" | |
| if published_parsed: | |
| try: | |
| dt_obj = datetime.fromtimestamp(time.mktime(published_parsed)) | |
| return dt_obj.strftime('%Y-%m-%d %H:%M') | |
| except Exception: | |
| return 'N/A' | |
| return 'N/A' | |
| def load_feed_from_cache(config: Any) -> Tuple[Optional[Any], str]: | |
| """Attempts to load a feed object from the cache file if it exists and is not expired.""" | |
| if not os.path.exists(config.CACHE_FILE): | |
| return None, "Cache file not found." | |
| try: | |
| # Check cache age | |
| file_age_seconds = time.time() - os.path.getmtime(config.CACHE_FILE) | |
| if file_age_seconds > config.CACHE_DURATION_SECONDS: | |
| # The cache is too old | |
| return None, f"Cache expired ({file_age_seconds:.0f}s old, limit is {config.CACHE_DURATION_SECONDS}s)." | |
| with open(config.CACHE_FILE, 'rb') as f: | |
| feed = pickle.load(f) | |
| return feed, f"Loaded successfully from cache (Age: {file_age_seconds:.0f}s)." | |
| except Exception as e: | |
| # If loading fails, treat it as a miss and attempt to clean up | |
| print(f"Warning: Failed to load cache file. Deleting corrupted cache. Reason: {e}") | |
| try: | |
| os.remove(config.CACHE_FILE) | |
| except OSError: | |
| pass # Ignore if removal fails | |
| return None, "Cache file corrupted or invalid. Will re-fetch." | |
| def save_feed_to_cache(config: Any, feed: Any) -> None: | |
| """Saves the fetched feed object to the cache file.""" | |
| try: | |
| with open(config.CACHE_FILE, 'wb') as f: | |
| pickle.dump(feed, f) | |
| print(f"Successfully saved new feed data to cache: {config.CACHE_FILE}") | |
| except Exception as e: | |
| print(f"Error saving to cache: {e}") | |
| def read_hacker_news_rss(config: Any) -> Tuple[Optional[Any], str]: | |
| """ | |
| Reads and parses the Hacker News RSS feed, using a cache if available. | |
| Returns the feedparser object and a status message. | |
| """ | |
| url = config.HN_RSS_URL | |
| print(f"Attempting to fetch and parse RSS feed from: {url}") | |
| print("-" * 50) | |
| # 1. Attempt to load from cache | |
| feed, cache_status = load_feed_from_cache(config) | |
| print(f"Cache Status: {cache_status}") | |
| # 2. If cache miss or stale, fetch from web | |
| if feed is None: | |
| print("Starting network fetch...") | |
| try: | |
| # Use feedparser to fetch and parse the feed | |
| feed = feedparser.parse(url) | |
| if feed.status >= 400: | |
| status_msg = f"Error fetching the feed. HTTP Status: {feed.status}" | |
| print(status_msg) | |
| return None, status_msg | |
| if feed.bozo: | |
| # Bozo is set if any error occurred, even non-critical ones. | |
| print(f"Warning: Failed to fully parse the feed. Reason: {feed.get('bozo_exception')}") | |
| # 3. If fetch successful, save new data to cache | |
| if feed.entries: | |
| save_feed_to_cache(config, feed) | |
| status_msg = f"Successfully fetched and cached {len(feed.entries)} entries." | |
| else: | |
| status_msg = "Fetch successful, but no entries found in the feed." | |
| print(status_msg) | |
| feed = None # Ensure feed is None if no entries | |
| except Exception as e: | |
| status_msg = f"An unexpected error occurred during network processing: {e}" | |
| print(status_msg) | |
| return None, status_msg | |
| else: | |
| status_msg = cache_status | |
| return feed, status_msg | |
| # Example usage (not part of the refactored module's purpose but good for testing) | |
| if __name__ == '__main__': | |
| from .config import AppConfig | |
| feed, status = read_hacker_news_rss(AppConfig) | |
| if feed and feed.entries: | |
| print(f"\nFetched {len(feed.entries)} entries. Top 3 titles:") | |
| for entry in feed.entries[:3]: | |
| print(f"- {entry.title}") | |
| else: | |
| print(f"Could not fetch the feed. Status: {status}") | |