File size: 4,329 Bytes
beabfb7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
import feedparser
import pickle
import os
import time
from datetime import datetime
from typing import Tuple, Any, Optional

# Assuming AppConfig is passed in via dependency injection in the refactored main app.

def format_published_time(published_parsed: Optional[time.struct_time]) -> str:
    """Safely converts a feedparser time struct to a formatted string."""
    if published_parsed:
        try:
            dt_obj = datetime.fromtimestamp(time.mktime(published_parsed))
            return dt_obj.strftime('%Y-%m-%d %H:%M')
        except Exception:
            return 'N/A'
    return 'N/A'

def load_feed_from_cache(config: Any) -> Tuple[Optional[Any], str]:
    """Attempts to load a feed object from the cache file if it exists and is not expired."""
    if not os.path.exists(config.CACHE_FILE):
        return None, "Cache file not found."

    try:
        # Check cache age
        file_age_seconds = time.time() - os.path.getmtime(config.CACHE_FILE)

        if file_age_seconds > config.CACHE_DURATION_SECONDS:
            # The cache is too old
            return None, f"Cache expired ({file_age_seconds:.0f}s old, limit is {config.CACHE_DURATION_SECONDS}s)."

        with open(config.CACHE_FILE, 'rb') as f:
            feed = pickle.load(f)
            return feed, f"Loaded successfully from cache (Age: {file_age_seconds:.0f}s)."

    except Exception as e:
        # If loading fails, treat it as a miss and attempt to clean up
        print(f"Warning: Failed to load cache file. Deleting corrupted cache. Reason: {e}")
        try:
            os.remove(config.CACHE_FILE)
        except OSError:
            pass # Ignore if removal fails
        return None, "Cache file corrupted or invalid. Will re-fetch."

def save_feed_to_cache(config: Any, feed: Any) -> None:
    """Saves the fetched feed object to the cache file."""
    try:
        with open(config.CACHE_FILE, 'wb') as f:
            pickle.dump(feed, f)
        print(f"Successfully saved new feed data to cache: {config.CACHE_FILE}")
    except Exception as e:
        print(f"Error saving to cache: {e}")

def read_hacker_news_rss(config: Any) -> Tuple[Optional[Any], str]:
    """
    Reads and parses the Hacker News RSS feed, using a cache if available.
    Returns the feedparser object and a status message.
    """
    url = config.HN_RSS_URL
    print(f"Attempting to fetch and parse RSS feed from: {url}")
    print("-" * 50)

    # 1. Attempt to load from cache
    feed, cache_status = load_feed_from_cache(config)
    print(f"Cache Status: {cache_status}")

    # 2. If cache miss or stale, fetch from web
    if feed is None:
        print("Starting network fetch...")
        try:
            # Use feedparser to fetch and parse the feed
            feed = feedparser.parse(url)

            if feed.status >= 400:
                status_msg = f"Error fetching the feed. HTTP Status: {feed.status}"
                print(status_msg)
                return None, status_msg

            if feed.bozo:
                # Bozo is set if any error occurred, even non-critical ones.
                print(f"Warning: Failed to fully parse the feed. Reason: {feed.get('bozo_exception')}")

            # 3. If fetch successful, save new data to cache
            if feed.entries:
                save_feed_to_cache(config, feed)
                status_msg = f"Successfully fetched and cached {len(feed.entries)} entries."
            else:
                status_msg = "Fetch successful, but no entries found in the feed."
                print(status_msg)
                feed = None # Ensure feed is None if no entries

        except Exception as e:
            status_msg = f"An unexpected error occurred during network processing: {e}"
            print(status_msg)
            return None, status_msg
    
    else:
        status_msg = cache_status

    return feed, status_msg

# Example usage (not part of the refactored module's purpose but good for testing)
if __name__ == '__main__':
    from .config import AppConfig
    feed, status = read_hacker_news_rss(AppConfig)
    if feed and feed.entries:
        print(f"\nFetched {len(feed.entries)} entries. Top 3 titles:")
        for entry in feed.entries[:3]:
            print(f"- {entry.title}")
    else:
        print(f"Could not fetch the feed. Status: {status}")