SHAFI commited on
Commit
3c35617
Β·
1 Parent(s): c79ae02

feat: Add pagination support to get_articles API

Browse files

- Added offset parameter to get_articles method in appwrite_db.py
- Supports pagination with limit and offset query parameters
- Updated logging to show pagination offset in debug messages
- Enables infinite scroll and better data loading performance

app/main.py CHANGED
@@ -73,6 +73,7 @@ async def root():
73
  }
74
 
75
  @app.get("/health")
 
76
  async def health_check():
77
  """
78
  Enhanced health check endpoint with scheduler status
 
73
  }
74
 
75
  @app.get("/health")
76
+ @app.head("/health") # ← Added for UptimeRobot compatibility
77
  async def health_check():
78
  """
79
  Enhanced health check endpoint with scheduler status
app/routes/news.py CHANGED
@@ -10,15 +10,25 @@ cache_service = CacheService()
10
  appwrite_db = get_appwrite_db()
11
 
12
  @router.get("/{category}", response_model=NewsResponse)
13
- async def get_news_by_category(category: str):
 
 
 
 
14
  """
15
- Get news articles by category with multi-layer caching (Phase 2)
16
 
17
  **THE GOLDEN RULE: Users NEVER wait for external APIs**
18
  - Users only read from database (Appwrite)
19
  - Background workers populate the database every 15 minutes
20
  - If database is empty, return empty state (workers will fill it soon)
21
 
 
 
 
 
 
 
22
  Caching Strategy:
23
  - L1 Cache: Redis (if available) - 600s TTL, ~5ms response
24
  - L2 Cache: Appwrite Database - persistent, 10-50ms response
@@ -39,8 +49,15 @@ async def get_news_by_category(category: str):
39
  - magazines: Tech Magazines
40
  """
41
  try:
 
 
 
 
 
42
  # L1: Check Redis cache (fastest path - ~5ms)
43
- cached_data = await cache_service.get(f"news:{category}")
 
 
44
  if cached_data:
45
  return NewsResponse(
46
  success=True,
@@ -52,11 +69,11 @@ async def get_news_by_category(category: str):
52
  )
53
 
54
  # L2: Check Appwrite database (fast persistent storage - ~50ms)
55
- db_articles = await appwrite_db.get_articles(category, limit=20)
56
 
57
  if db_articles:
58
  # Cache the database results in Redis for next request
59
- await cache_service.set(f"news:{category}", db_articles)
60
 
61
  return NewsResponse(
62
  success=True,
@@ -69,7 +86,6 @@ async def get_news_by_category(category: str):
69
 
70
  # Database is empty - return empty state
71
  # Background workers will populate the database every 15 minutes
72
- # User should check back soon or wait for automatic refresh
73
  return NewsResponse(
74
  success=True,
75
  category=category,
 
10
  appwrite_db = get_appwrite_db()
11
 
12
  @router.get("/{category}", response_model=NewsResponse)
13
+ async def get_news_by_category(
14
+ category: str,
15
+ limit: int = 20, # ← Pagination: items per page
16
+ page: int = 1 # ← Pagination: page number (1-indexed)
17
+ ):
18
  """
19
+ Get news articles by category with multi-layer caching and pagination (Phase 4)
20
 
21
  **THE GOLDEN RULE: Users NEVER wait for external APIs**
22
  - Users only read from database (Appwrite)
23
  - Background workers populate the database every 15 minutes
24
  - If database is empty, return empty state (workers will fill it soon)
25
 
26
+ **Pagination:**
27
+ - limit: Number of articles per page (default: 20, max: 100)
28
+ - page: Page number starting from 1 (default: 1)
29
+ - Example: page=1, limit=20 returns articles 1-20
30
+ - Example: page=2, limit=20 returns articles 21-40
31
+
32
  Caching Strategy:
33
  - L1 Cache: Redis (if available) - 600s TTL, ~5ms response
34
  - L2 Cache: Appwrite Database - persistent, 10-50ms response
 
49
  - magazines: Tech Magazines
50
  """
51
  try:
52
+ # Validate and cap pagination parameters
53
+ limit = min(limit, 100) # Max 100 items per page
54
+ page = max(page, 1) # Minimum page 1
55
+ offset = (page - 1) * limit # Calculate offset
56
+
57
  # L1: Check Redis cache (fastest path - ~5ms)
58
+ # Note: Cache key now includes pagination params
59
+ cache_key = f"news:{category}:p{page}:l{limit}"
60
+ cached_data = await cache_service.get(cache_key)
61
  if cached_data:
62
  return NewsResponse(
63
  success=True,
 
69
  )
70
 
71
  # L2: Check Appwrite database (fast persistent storage - ~50ms)
72
+ db_articles = await appwrite_db.get_articles(category, limit=limit, offset=offset)
73
 
74
  if db_articles:
75
  # Cache the database results in Redis for next request
76
+ await cache_service.set(cache_key, db_articles)
77
 
78
  return NewsResponse(
79
  success=True,
 
86
 
87
  # Database is empty - return empty state
88
  # Background workers will populate the database every 15 minutes
 
89
  return NewsResponse(
90
  success=True,
91
  category=category,
app/services/appwrite_db.py CHANGED
@@ -84,13 +84,14 @@ class AppwriteDatabase:
84
  """
85
  return hashlib.sha256(url.encode()).hexdigest()[:16]
86
 
87
- async def get_articles(self, category: str, limit: int = 20) -> List[Dict]:
88
  """
89
- Get articles by category from Appwrite database (L2 cache)
90
 
91
  Args:
92
  category: News category (e.g., 'ai', 'data-security')
93
- limit: Maximum number of articles to return
 
94
 
95
  Returns:
96
  List of article dictionaries, sorted by published_at DESC
@@ -99,14 +100,15 @@ class AppwriteDatabase:
99
  return []
100
 
101
  try:
102
- # Query articles by category, sorted by published date
103
  response = self.databases.list_documents(
104
  database_id=settings.APPWRITE_DATABASE_ID,
105
  collection_id=settings.APPWRITE_COLLECTION_ID,
106
  queries=[
107
  Query.equal('category', category), # SDK v4.x uses string value
108
  Query.order_desc('published_at'),
109
- Query.limit(limit)
 
110
  ]
111
  )
112
 
@@ -129,7 +131,7 @@ class AppwriteDatabase:
129
  continue
130
 
131
  if articles:
132
- print(f"βœ“ Retrieved {len(articles)} articles for '{category}' from Appwrite (L2 cache)")
133
 
134
  return articles
135
 
 
84
  """
85
  return hashlib.sha256(url.encode()).hexdigest()[:16]
86
 
87
+ async def get_articles(self, category: str, limit: int = 20, offset: int = 0) -> List[Dict]:
88
  """
89
+ Get articles by category from Appwrite database (L2 cache) with pagination
90
 
91
  Args:
92
  category: News category (e.g., 'ai', 'data-security')
93
+ limit: Maximum number of articles to return (default: 20)
94
+ offset: Number of articles to skip for pagination (default: 0)
95
 
96
  Returns:
97
  List of article dictionaries, sorted by published_at DESC
 
100
  return []
101
 
102
  try:
103
+ # Query articles by category, sorted by published date with pagination
104
  response = self.databases.list_documents(
105
  database_id=settings.APPWRITE_DATABASE_ID,
106
  collection_id=settings.APPWRITE_COLLECTION_ID,
107
  queries=[
108
  Query.equal('category', category), # SDK v4.x uses string value
109
  Query.order_desc('published_at'),
110
+ Query.limit(limit),
111
+ Query.offset(offset) # ← Pagination support
112
  ]
113
  )
114
 
 
131
  continue
132
 
133
  if articles:
134
+ print(f"βœ“ Retrieved {len(articles)} articles for '{category}' from Appwrite (offset: {offset})")
135
 
136
  return articles
137
 
app/services/scheduler.py CHANGED
@@ -45,17 +45,23 @@ async def fetch_all_news():
45
  Runs every 15 minutes to keep database fresh with latest articles.
46
  This ensures users always get fast responses from L2 cache (Appwrite).
47
  """
 
 
48
  logger.info("═" * 80)
49
  logger.info("πŸ“° [NEWS FETCHER] Starting news fetch for all categories...")
50
- logger.info("πŸ• Fetch Time: %s", datetime.now().strftime('%Y-%m-%d %H:%M:%S'))
51
  logger.info("═" * 80)
52
 
53
  news_aggregator = NewsAggregator()
54
  appwrite_db = get_appwrite_db()
55
  cache_service = CacheService()
56
 
 
57
  total_fetched = 0
58
  total_saved = 0
 
 
 
59
 
60
  for category in CATEGORIES:
61
  try:
@@ -70,8 +76,20 @@ async def fetch_all_news():
70
  # Save to Appwrite database (L2)
71
  logger.info("πŸ’Ύ Saving to Appwrite database...")
72
  saved_count = await appwrite_db.save_articles(articles)
 
 
 
 
73
  total_fetched += len(articles)
74
  total_saved += saved_count
 
 
 
 
 
 
 
 
75
 
76
  # Update Redis cache (L1) if available
77
  try:
@@ -80,21 +98,40 @@ async def fetch_all_news():
80
  except Exception as e:
81
  logger.debug("⚠️ Redis cache unavailable (not critical): %s", e)
82
 
83
- logger.info("βœ… SUCCESS: %d articles fetched, %d new articles saved", len(articles), saved_count)
 
84
  else:
85
  logger.warning("⚠️ WARNING: No articles available from any provider")
 
86
 
87
  except Exception as e:
 
 
88
  logger.error("❌ ERROR in %s: %s", category, str(e))
89
  logger.exception("Full traceback:")
90
  continue
91
 
 
 
 
 
92
  logger.info("")
93
  logger.info("═" * 80)
94
- logger.info("πŸŽ‰ [NEWS FETCHER] COMPLETED!")
95
- logger.info("πŸ“Š Total fetched: %d articles", total_fetched)
96
- logger.info("πŸ’Ύ Total saved: %d new articles", total_saved)
97
- logger.info("πŸ• Completion time: %s", datetime.now().strftime('%Y-%m-%d %H:%M:%S'))
 
 
 
 
 
 
 
 
 
 
 
98
  logger.info("═" * 80)
99
 
100
 
 
45
  Runs every 15 minutes to keep database fresh with latest articles.
46
  This ensures users always get fast responses from L2 cache (Appwrite).
47
  """
48
+ start_time = datetime.now()
49
+
50
  logger.info("═" * 80)
51
  logger.info("πŸ“° [NEWS FETCHER] Starting news fetch for all categories...")
52
+ logger.info("πŸ• Start Time: %s", start_time.strftime('%Y-%m-%d %H:%M:%S'))
53
  logger.info("═" * 80)
54
 
55
  news_aggregator = NewsAggregator()
56
  appwrite_db = get_appwrite_db()
57
  cache_service = CacheService()
58
 
59
+ # Phase 4: Enhanced tracking for observability
60
  total_fetched = 0
61
  total_saved = 0
62
+ total_duplicates = 0
63
+ total_errors = 0
64
+ category_stats = {} # Track per-category stats
65
 
66
  for category in CATEGORIES:
67
  try:
 
76
  # Save to Appwrite database (L2)
77
  logger.info("πŸ’Ύ Saving to Appwrite database...")
78
  saved_count = await appwrite_db.save_articles(articles)
79
+
80
+ # Calculate duplicates (fetched - saved = duplicates)
81
+ duplicates = len(articles) - saved_count
82
+
83
  total_fetched += len(articles)
84
  total_saved += saved_count
85
+ total_duplicates += duplicates
86
+
87
+ # Store category stats
88
+ category_stats[category] = {
89
+ 'fetched': len(articles),
90
+ 'saved': saved_count,
91
+ 'duplicates': duplicates
92
+ }
93
 
94
  # Update Redis cache (L1) if available
95
  try:
 
98
  except Exception as e:
99
  logger.debug("⚠️ Redis cache unavailable (not critical): %s", e)
100
 
101
+ logger.info("βœ… SUCCESS: %d fetched, %d new, %d duplicates",
102
+ len(articles), saved_count, duplicates)
103
  else:
104
  logger.warning("⚠️ WARNING: No articles available from any provider")
105
+ category_stats[category] = {'fetched': 0, 'saved': 0, 'duplicates': 0}
106
 
107
  except Exception as e:
108
+ total_errors += 1
109
+ category_stats[category] = {'error': str(e)}
110
  logger.error("❌ ERROR in %s: %s", category, str(e))
111
  logger.exception("Full traceback:")
112
  continue
113
 
114
+ # Phase 4: Structured end-of-run report
115
+ end_time = datetime.now()
116
+ duration = (end_time - start_time).total_seconds()
117
+
118
  logger.info("")
119
  logger.info("═" * 80)
120
+ logger.info("πŸŽ‰ [NEWS FETCHER] RUN COMPLETED")
121
+ logger.info("═" * 80)
122
+ logger.info("πŸ“Š SUMMARY STATISTICS:")
123
+ logger.info(" πŸ”Ή Total Fetched: %d articles", total_fetched)
124
+ logger.info(" πŸ”Ή Total Saved (New): %d articles", total_saved)
125
+ logger.info(" πŸ”Ή Total Duplicates Skipped: %d articles", total_duplicates)
126
+ logger.info(" πŸ”Ή Total Errors: %d categories", total_errors)
127
+ logger.info(" πŸ”Ή Categories Processed: %d/%d", len(CATEGORIES) - total_errors, len(CATEGORIES))
128
+ logger.info(" πŸ”Ή Deduplication Rate: %.1f%%", (total_duplicates / total_fetched * 100) if total_fetched > 0 else 0)
129
+ logger.info("")
130
+ logger.info("⏱️ PERFORMANCE:")
131
+ logger.info(" πŸ”Ή Start: %s", start_time.strftime('%H:%M:%S'))
132
+ logger.info(" πŸ”Ή End: %s", end_time.strftime('%H:%M:%S'))
133
+ logger.info(" πŸ”Ή Duration: %.2f seconds", duration)
134
+ logger.info(" πŸ”Ή Throughput: %.1f articles/second", total_fetched / duration if duration > 0 else 0)
135
  logger.info("═" * 80)
136
 
137