Spaces:

WORKWITHSHAFISK
/

segmentopulse-backend

Paused

SHAFI commited on Jan 19

Commit

9ffe1f1

1 Parent(s): 8b90318

feat: Add comprehensive emoji logging for better observability

- Enhanced scheduler with detailed status indicators (⏰📰🧹✅❌)
- Added provider failover logging with rate limit detection (😢⏭️)
- Improved database operation visibility (💾📊🗑️)
- Created test utilities for manual verification
- All operations now clearly visible in HF Spaces logs

Files changed (7) hide show

app/services/appwrite_db.py +19 -7
app/services/news_aggregator.py +6 -6
app/services/news_providers.py +41 -13
app/services/scheduler.py +97 -24
seed_medium.py +105 -0
test_cleanup.py +29 -0
test_scheduler_status.py +48 -0

app/services/appwrite_db.py CHANGED Viewed

@@ -59,12 +59,22 @@ class AppwriteDatabase:
             self.databases = Databases(self.client)
             self.initialized = True
-            print(f"✓ Appwrite database initialized successfully")
-            print(f"  Database: {settings.APPWRITE_DATABASE_ID}")
-            print(f"  Collection: {settings.APPWRITE_COLLECTION_ID}")
         except Exception as e:
-            print(f"✗ Appwrite initialization error: {e}")
             self.initialized = False
     def _generate_url_hash(self, url: str) -> str:
@@ -189,9 +199,9 @@ class AppwriteDatabase:
                 continue
         if saved_count > 0:
-            print(f"✓ Saved {saved_count} new articles to Appwrite")
         if skipped_count > 0:
-            print(f"  Skipped {skipped_count} duplicate articles")
         return saved_count
@@ -234,7 +244,9 @@ class AppwriteDatabase:
                     print(f"Error deleting document {doc['$id']}: {e}")
             if deleted_count > 0:
-                print(f"✓ Deleted {deleted_count} articles older than {days} days")
             return deleted_count

             self.databases = Databases(self.client)
             self.initialized = True
+            print("")
+            print("✓" * 80)
+            print("✅ [Appwrite] Database initialized successfully!")
+            print(f"📊 Database ID: {settings.APPWRITE_DATABASE_ID}")
+            print(f"📋 Collection ID: {settings.APPWRITE_COLLECTION_ID}")
+            print("✓" * 80)
+            print("")
         except Exception as e:
+            print("")
+            print("✗" * 80)
+            print("❌ [Appwrite] Initialization FAILED!")
+            print(f"⚠️  Error: {e}")
+            print("💡 Please check your Appwrite credentials in .env file")
+            print("✗" * 80)
+            print("")
             self.initialized = False
     def _generate_url_hash(self, url: str) -> str:
                 continue
         if saved_count > 0:
+            print(f"✅ [Appwrite] Saved {saved_count} new articles to database")
         if skipped_count > 0:
+            print (f"⏭️  [Appwrite] Skipped {skipped_count} duplicate articles")
         return saved_count
                     print(f"Error deleting document {doc['$id']}: {e}")
             if deleted_count > 0:
+                print(f"✅ [Appwrite] Deleted {deleted_count} articles older than {days} days")
+            else:
+                print(f"📋 [Appwrite] No old articles to delete")
             return deleted_count

app/services/news_aggregator.py CHANGED Viewed

@@ -73,17 +73,17 @@ class NewsAggregator:
             # Skip if provider is not available (rate limited)
             if not provider.is_available():
-                print(f"Provider {provider_name} is not available (rate limited), trying next...")
                 self.stats['failover_count'] += 1
                 continue
             try:
-                print(f"Fetching news for '{category}' from {provider_name}...")
                 articles = await provider.fetch_news(category, limit=20)
                 # If we got articles, return them
                 if articles:
-                    print(f"✓ Successfully fetched {len(articles)} articles from {provider_name}")
                     # Track usage statistics
                     if provider_name not in self.stats['provider_usage']:
@@ -92,15 +92,15 @@ class NewsAggregator:
                     return articles
                 else:
-                    print(f"Provider {provider_name} returned no articles, trying next...")
             except Exception as e:
-                print(f"Error with provider {provider_name}: {e}, trying next...")
                 self.stats['failover_count'] += 1
                 continue
         # If all providers failed, return empty list
-        print(f"⚠ All providers exhausted for category '{category}'")
         return []
     async def fetch_rss(self, provider: str) -> List[Article]:

             # Skip if provider is not available (rate limited)
             if not provider.is_available():
+                print(f"⏭️  [{provider_name.upper()}] Not available (rate limited), trying next...")
                 self.stats['failover_count'] += 1
                 continue
             try:
+                print(f"📡 [{provider_name.upper()}] Attempting to fetch '{category}' news...")
                 articles = await provider.fetch_news(category, limit=20)
                 # If we got articles, return them
                 if articles:
+                    # No need to print here, provider already printed success
                     # Track usage statistics
                     if provider_name not in self.stats['provider_usage']:
                     return articles
                 else:
+                    print(f"⏭️  [{provider_name.upper()}] No articles returned, trying next provider...")
             except Exception as e:
+                print(f"❌ [{provider_name.upper()}] Error: {e}, trying next...")
                 self.stats['failover_count'] += 1
                 continue
         # If all providers failed, return empty list
+        print(f"😞 [NEWS AGGREGATOR] All providers exhausted for '{category}' - no articles available")
         return []
     async def fetch_rss(self, provider: str) -> List[Article]:

app/services/news_providers.py CHANGED Viewed

@@ -62,7 +62,8 @@ class GNewsProvider(NewsProvider):
             'business-analytics': 'business analytics',
             'customer-data-platform': 'customer data platform CDP',
             'data-centers': 'data centers infrastructure',
-            'cloud-computing': 'cloud computing',
             'magazines': 'technology news',
         }
@@ -86,17 +87,25 @@ class GNewsProvider(NewsProvider):
                 response = await client.get(url, params=params)
                 if response.status_code == 429:
                     self.mark_rate_limited()
                     return []
                 if response.status_code == 200:
                     self.request_count += 1
                     data = response.json()
-                    return self._parse_response(data, category)
                 return []
         except Exception as e:
-            print(f"GNews API error: {e}")
             return []
     def _parse_response(self, data: Dict, category: str) -> List[Article]:
@@ -115,7 +124,7 @@ class GNewsProvider(NewsProvider):
                 )
                 articles.append(article)
             except Exception as e:
-                print(f"Error parsing GNews article: {e}")
                 continue
         return articles
@@ -139,7 +148,8 @@ class NewsAPIProvider(NewsProvider):
             'business-analytics': '"business analytics" OR analytics',
             'customer-data-platform': '"customer data platform" OR CDP',
             'data-centers': '"data centers" OR "data centre"',
-            'cloud-computing': '"cloud computing" OR cloud',
             'magazines': 'technology',
         }
@@ -192,7 +202,7 @@ class NewsAPIProvider(NewsProvider):
                 )
                 articles.append(article)
             except Exception as e:
-                print(f"Error parsing NewsAPI article: {e}")
                 continue
         return articles
@@ -216,7 +226,8 @@ class NewsDataProvider(NewsProvider):
             'business-analytics': 'business analytics',
             'customer-data-platform': 'customer data platform',
             'data-centers': 'data centers',
-            'cloud-computing': 'cloud computing',
             'magazines': 'technology',
         }
@@ -239,17 +250,25 @@ class NewsDataProvider(NewsProvider):
                 response = await client.get(url, params=params)
                 if response.status_code == 429:
                     self.mark_rate_limited()
                     return []
                 if response.status_code == 200:
                     self.request_count += 1
                     data = response.json()
-                    return self._parse_response(data, category, limit)
                 return []
         except Exception as e:
-            print(f"NewsData.io error: {e}")
             return []
     def _parse_response(self, data: Dict, category: str, limit: int) -> List[Article]:
@@ -268,7 +287,7 @@ class NewsDataProvider(NewsProvider):
                 )
                 articles.append(article)
             except Exception as e:
-                print(f"Error parsing NewsData article: {e}")
                 continue
         return articles
@@ -291,7 +310,8 @@ class GoogleNewsRSSProvider(NewsProvider):
             'business-analytics': 'https://news.google.com/rss/search?q=business+analytics&hl=en-US&gl=US&ceid=US:en',
             'customer-data-platform': 'https://news.google.com/rss/search?q=customer+data+platform+OR+CDP&hl=en-US&gl=US&ceid=US:en',
             'data-centers': 'https://news.google.com/rss/search?q=data+centers+OR+data+centre&hl=en-US&gl=US&ceid=US:en',
-            'cloud-computing': 'https://news.google.com/rss/search?q=cloud+computing&hl=en-US&gl=US&ceid=US:en',
             'magazines': 'https://news.google.com/rss/headlines/section/topic/TECHNOLOGY?hl=en-US&gl=US&ceid=US:en',
         }
@@ -308,15 +328,23 @@ class GoogleNewsRSSProvider(NewsProvider):
                 response = await client.get(feed_url)
                 if response.status_code == 429:
                     self.mark_rate_limited()
                     return []
                 if response.status_code == 200:
                     self.request_count += 1
                     parser = RSSParser()
-                    return await parser.parse_google_news(response.text, category)
                 return []
         except Exception as e:
-            print(f"Google News RSS error: {e}")
             return []

             'business-analytics': 'business analytics',
             'customer-data-platform': 'customer data platform CDP',
             'data-centers': 'data centers infrastructure',
+            'cloud-computing': 'cloud computing AWS Azure Google Cloud Salesforce Alibaba Cloud Tencent Cloud Huawei Cloud Cloudflare',
+            'medium-article': 'Medium article blog writing publishing',
             'magazines': 'technology news',
         }
                 response = await client.get(url, params=params)
                 if response.status_code == 429:
+                    print("😢 [GNews] Rate limit hit! Switching to next provider...")
                     self.mark_rate_limited()
                     return []
                 if response.status_code == 200:
                     self.request_count += 1
                     data = response.json()
+                    articles = self._parse_response(data, category)
+                    if articles:
+                        print(f"✅ [GNews] Fetched {len(articles)} articles successfully")
+                    else:
+                        print("⚠️  [GNews] No articles found in response")
+                    return articles
+                else:
+                    print(f"❌ [GNews] HTTP {response.status_code} error")
                 return []
         except Exception as e:
+            print(f"❌ [GNews] API error: {e}")
             return []
     def _parse_response(self, data: Dict, category: str) -> List[Article]:
                 )
                 articles.append(article)
             except Exception as e:
+                print(f"⚠️  [GNews] Error parsing article: {e}")
                 continue
         return articles
             'business-analytics': '"business analytics" OR analytics',
             'customer-data-platform': '"customer data platform" OR CDP',
             'data-centers': '"data centers" OR "data centre"',
+            'cloud-computing': '"cloud computing" OR AWS OR Azure OR "Google Cloud" OR Salesforce OR "Alibaba Cloud" OR "Tencent Cloud" OR "Huawei Cloud" OR Cloudflare',
+            'medium-article': 'Medium OR "Medium article" OR "Medium blog" OR "Medium publishing"',
             'magazines': 'technology',
         }
                 )
                 articles.append(article)
             except Exception as e:
+                print(f"⚠️  [NewsAPI] Error parsing article: {e}")
                 continue
         return articles
             'business-analytics': 'business analytics',
             'customer-data-platform': 'customer data platform',
             'data-centers': 'data centers',
+            'cloud-computing': 'cloud computing,AWS,Azure,Google Cloud,Salesforce,Alibaba Cloud,Tencent Cloud,Huawei Cloud,Cloudflare',
+            'medium-article': 'Medium,article,blog,writing,publishing',
             'magazines': 'technology',
         }
                 response = await client.get(url, params=params)
                 if response.status_code == 429:
+                    print("😢 [NewsData] Rate limit hit! Switching to next provider...")
                     self.mark_rate_limited()
                     return []
                 if response.status_code == 200:
                     self.request_count += 1
                     data = response.json()
+                    articles = self._parse_response(data, category, limit)
+                    if articles:
+                        print(f"✅ [NewsData] Fetched {len(articles)} articles successfully")
+                    else:
+                        print("⚠️  [NewsData] No articles found in response")
+                    return articles
+                else:
+                    print(f"❌ [NewsData] HTTP {response.status_code} error")
                 return []
         except Exception as e:
+            print(f"❌ [NewsData] error: {e}")
             return []
     def _parse_response(self, data: Dict, category: str, limit: int) -> List[Article]:
                 )
                 articles.append(article)
             except Exception as e:
+                print(f"⚠️  [NewsData] Error parsing article: {e}")
                 continue
         return articles
             'business-analytics': 'https://news.google.com/rss/search?q=business+analytics&hl=en-US&gl=US&ceid=US:en',
             'customer-data-platform': 'https://news.google.com/rss/search?q=customer+data+platform+OR+CDP&hl=en-US&gl=US&ceid=US:en',
             'data-centers': 'https://news.google.com/rss/search?q=data+centers+OR+data+centre&hl=en-US&gl=US&ceid=US:en',
+            'cloud-computing': 'https://news.google.com/rss/search?q=cloud+computing+OR+AWS+OR+Azure+OR+Google+Cloud+OR+Salesforce+OR+Alibaba+Cloud+OR+Tencent+Cloud+OR+Huawei+Cloud+OR+Cloudflare&hl=en-US&gl=US&ceid=US:en',
+            'medium-article': 'https://news.google.com/rss/search?q=Medium+article+OR+Medium+blog+OR+Medium+publishing&hl=en-US&gl=US&ceid=US:en',
             'magazines': 'https://news.google.com/rss/headlines/section/topic/TECHNOLOGY?hl=en-US&gl=US&ceid=US:en',
         }
                 response = await client.get(feed_url)
                 if response.status_code == 429:
+                    print("😢 [Google RSS] Rate limit hit! Trying next provider...")
                     self.mark_rate_limited()
                     return []
                 if response.status_code == 200:
                     self.request_count += 1
                     parser = RSSParser()
+                    articles = await parser.parse_google_news(response.text, category)
+                    if articles:
+                        print(f"✅ [Google RSS] Fetched {len(articles)} articles successfully")
+                    else:
+                        print("⚠️  [Google RSS] No articles found in feed")
+                    return articles
+                else:
+                    print(f"❌ [Google RSS] HTTP {response.status_code} error")
                 return []
         except Exception as e:
+            print(f"❌ [Google RSS] error: {e}")
             return []

app/services/scheduler.py CHANGED Viewed

@@ -44,7 +44,10 @@ async def fetch_all_news():
     Runs every 15 minutes to keep database fresh with latest articles.
     This ensures users always get fast responses from L2 cache (Appwrite).
     """
-    logger.info("🔄 [Background Fetcher] Starting news fetch for all categories...")
     news_aggregator = NewsAggregator()
     appwrite_db = get_appwrite_db()
@@ -55,13 +58,16 @@ async def fetch_all_news():
     for category in CATEGORIES:
         try:
-            logger.info(f"  Fetching {category}...")
             # Fetch from external APIs
             articles = await news_aggregator.fetch_by_category(category)
             if articles:
                 # Save to Appwrite database (L2)
                 saved_count = await appwrite_db.save_articles(articles)
                 total_fetched += len(articles)
                 total_saved += saved_count
@@ -69,18 +75,26 @@ async def fetch_all_news():
                 # Update Redis cache (L1) if available
                 try:
                     await cache_service.set(f"news:{category}", articles, ttl=settings.CACHE_TTL)
                 except Exception as e:
-                    logger.debug(f"  Redis cache update skipped for {category}: {e}")
-                logger.info(f"  ✓ {category}: {len(articles)} fetched, {saved_count} saved")
             else:
-                logger.warning(f"  ✗ {category}: No articles available")
         except Exception as e:
-            logger.error(f"  ✗ {category}: Error - {e}")
             continue
-    logger.info(f"✅ [Background Fetcher] Complete! {total_fetched} articles fetched, {total_saved} new articles saved")
 async def cleanup_old_news():
@@ -90,12 +104,18 @@ async def cleanup_old_news():
     Runs daily at midnight to keep Appwrite database within free tier limits.
     Only keeps the last 2 days of articles.
     """
-    logger.info("🧹 [Janitor] Starting cleanup of old news articles...")
     appwrite_db = get_appwrite_db()
     if not appwrite_db.initialized:
-        logger.warning("  Appwrite not initialized - skipping cleanup")
         return
     try:
@@ -104,10 +124,12 @@ async def cleanup_old_news():
         cutoff_date = datetime.now() - timedelta(hours=retention_hours)
         cutoff_iso = cutoff_date.isoformat()
-        logger.info(f"  Retention policy: {retention_hours} hours")
-        logger.info(f"  Cutoff date: {cutoff_date.strftime('%Y-%m-%d %H:%M:%S')}")
         # Query and delete old articles
         from appwrite.query import Query
         response = appwrite_db.databases.list_documents(
@@ -119,7 +141,12 @@ async def cleanup_old_news():
             ]
         )
         deleted_count = 0
         for doc in response['documents']:
             try:
                 appwrite_db.databases.delete_document(
@@ -128,32 +155,55 @@ async def cleanup_old_news():
                     document_id=doc['$id']
                 )
                 deleted_count += 1
             except Exception as e:
-                logger.error(f"  Error deleting document {doc['$id']}: {e}")
         # Clear Redis cache to force refresh from updated database
         cache_service = CacheService()
         for category in CATEGORIES:
             try:
                 await cache_service.delete(f"news:{category}")
             except Exception as e:
-                logger.debug(f"  Cache clear skipped for {category}: {e}")
-        logger.info(f"✅ [Janitor] Complete! Deleted {deleted_count} articles older than {retention_hours} hours")
         # If there are more old articles, schedule another cleanup soon
         if len(response['documents']) >= 100:
-            logger.info(f"  More old articles detected - will clean up again in next run")
     except Exception as e:
-        logger.error(f"✗ [Janitor] Cleanup failed: {e}")
 def start_scheduler():
     """
     Initialize and start the background scheduler with all jobs
     """
-    logger.info("⏰ Starting background scheduler...")
     # Job 1: Fetch news every 15 minutes
     scheduler.add_job(
@@ -163,7 +213,9 @@ def start_scheduler():
         name='News Fetcher (every 15 min)',
         replace_existing=True
     )
-    logger.info("  ✓ Registered: News Fetcher (every 15 minutes)")
     # Job 2: Cleanup old news daily at midnight (00:00)
     scheduler.add_job(
@@ -173,30 +225,51 @@ def start_scheduler():
         name='Database Janitor (daily at midnight)',
         replace_existing=True
     )
-    logger.info("  ✓ Registered: Database Janitor (daily at 00:00 UTC)")
     # Start the scheduler
     scheduler.start()
-    logger.info("✅ Background scheduler started successfully!")
 def shutdown_scheduler():
     """
     Gracefully shutdown the scheduler
     """
-    logger.info("⏹️  Shutting down background scheduler...")
     scheduler.shutdown(wait=True)
-    logger.info("✅ Background scheduler shut down successfully")
 # Manual job triggers for testing (can be called from admin endpoints)
 async def trigger_fetch_now():
     """Manually trigger news fetch (for testing)"""
-    logger.info("🔧 [Manual Trigger] Running fetch job now...")
     await fetch_all_news()
 async def trigger_cleanup_now():
     """Manually trigger cleanup (for testing)"""
-    logger.info("🔧 [Manual Trigger] Running cleanup job now...")
     await cleanup_old_news()

     Runs every 15 minutes to keep database fresh with latest articles.
     This ensures users always get fast responses from L2 cache (Appwrite).
     """
+    logger.info("═" * 80)
+    logger.info("📰 [NEWS FETCHER] Starting news fetch for all categories...")
+    logger.info("🕐 Fetch Time: %s", datetime.now().strftime('%Y-%m-%d %H:%M:%S'))
+    logger.info("═" * 80)
     news_aggregator = NewsAggregator()
     appwrite_db = get_appwrite_db()
     for category in CATEGORIES:
         try:
+            logger.info("")
+            logger.info("📌 Category: %s", category.upper())
+            logger.info("⏳ Fetching from news providers...")
             # Fetch from external APIs
             articles = await news_aggregator.fetch_by_category(category)
             if articles:
                 # Save to Appwrite database (L2)
+                logger.info("💾 Saving to Appwrite database...")
                 saved_count = await appwrite_db.save_articles(articles)
                 total_fetched += len(articles)
                 total_saved += saved_count
                 # Update Redis cache (L1) if available
                 try:
                     await cache_service.set(f"news:{category}", articles, ttl=settings.CACHE_TTL)
+                    logger.info("⚡ Redis cache updated")
                 except Exception as e:
+                    logger.debug("⚠️  Redis cache unavailable (not critical): %s", e)
+                logger.info("✅ SUCCESS: %d articles fetched, %d new articles saved", len(articles), saved_count)
             else:
+                logger.warning("⚠️  WARNING: No articles available from any provider")
         except Exception as e:
+            logger.error("❌ ERROR in %s: %s", category, str(e))
+            logger.exception("Full traceback:")
             continue
+    logger.info("")
+    logger.info("═" * 80)
+    logger.info("🎉 [NEWS FETCHER] COMPLETED!")
+    logger.info("📊 Total fetched: %d articles", total_fetched)
+    logger.info("💾 Total saved: %d new articles", total_saved)
+    logger.info("🕐 Completion time: %s", datetime.now().strftime('%Y-%m-%d %H:%M:%S'))
+    logger.info("═" * 80)
 async def cleanup_old_news():
     Runs daily at midnight to keep Appwrite database within free tier limits.
     Only keeps the last 2 days of articles.
     """
+    logger.info("")
+    logger.info("═" * 80)
+    logger.info("🧹 [CLEANUP JANITOR] Starting cleanup of old news articles...")
+    logger.info("🕐 Cleanup Time: %s", datetime.now().strftime('%Y-%m-%d %H:%M:%S'))
+    logger.info("═" * 80)
     appwrite_db = get_appwrite_db()
     if not appwrite_db.initialized:
+        logger.error("❌ CRITICAL: Appwrite database not initialized!")
+        logger.error("⚠️  Cleanup cannot proceed - database connection required")
+        logger.error("💡 Check Appwrite credentials in environment variables")
         return
     try:
         cutoff_date = datetime.now() - timedelta(hours=retention_hours)
         cutoff_iso = cutoff_date.isoformat()
+        logger.info("📋 Retention Policy: %d hours", retention_hours)
+        logger.info("📅 Cutoff Date: %s", cutoff_date.strftime('%Y-%m-%d %H:%M:%S'))
+        logger.info("🗑️  Articles published before this will be deleted...")
         # Query and delete old articles
+        logger.info("🔍 Querying Appwrite for old articles...")
         from appwrite.query import Query
         response = appwrite_db.databases.list_documents(
             ]
         )
+        logger.info("📊 Found %d old articles to delete", len(response['documents']))
         deleted_count = 0
+        if len(response['documents']) > 0:
+            logger.info("🗑️  Deleting articles...")
         for doc in response['documents']:
             try:
                 appwrite_db.databases.delete_document(
                     document_id=doc['$id']
                 )
                 deleted_count += 1
+                if deleted_count % 10 == 0:
+                    logger.info("   Progress: %d articles deleted...", deleted_count)
             except Exception as e:
+                logger.error("❌ Error deleting document %s: %s", doc['$id'], e)
         # Clear Redis cache to force refresh from updated database
+        logger.info("🔄 Clearing Redis cache...")
         cache_service = CacheService()
+        cache_cleared = 0
         for category in CATEGORIES:
             try:
                 await cache_service.delete(f"news:{category}")
+                cache_cleared += 1
             except Exception as e:
+                logger.debug("⚠️  Cache clear skipped for %s: %s", category, e)
+        if cache_cleared > 0:
+            logger.info("✅ Cache cleared for %d categories", cache_cleared)
+        logger.info("")
+        logger.info("═" * 80)
+        logger.info("🎉 [CLEANUP JANITOR] COMPLETED!")
+        logger.info("🗑️  Total Deleted: %d articles", deleted_count)
+        logger.info("⏰ Retention: Articles older than %d hours removed", retention_hours)
+        logger.info("🕐 Completion Time: %s", datetime.now().strftime('%Y-%m-%d %H:%M:%S'))
+        logger.info("═" * 80)
         # If there are more old articles, schedule another cleanup soon
         if len(response['documents']) >= 100:
+            logger.warning("⚠️  WARNING: More old articles detected (100+ limit reached)")
+            logger.warning("📅 Additional cleanup will run in next scheduled job")
     except Exception as e:
+        logger.error("")
+        logger.error("═" * 80)
+        logger.error("❌ [CLEANUP JANITOR] FAILED!")
+        logger.error("Error: %s", str(e))
+        logger.error("═" * 80)
+        logger.exception("Full traceback:")
 def start_scheduler():
     """
     Initialize and start the background scheduler with all jobs
     """
+    logger.info("")
+    logger.info("═" * 80)
+    logger.info("⏰ [SCHEDULER] Initializing background scheduler...")
+    logger.info("═" * 80)
     # Job 1: Fetch news every 15 minutes
     scheduler.add_job(
         name='News Fetcher (every 15 min)',
         replace_existing=True
     )
+    logger.info("✅ Job #1 Registered: 📰 News Fetcher")
+    logger.info("   ⏱️  Schedule: Every 15 minutes")
+    logger.info("   📋 Task: Fetch news from all providers and update database")
     # Job 2: Cleanup old news daily at midnight (00:00)
     scheduler.add_job(
         name='Database Janitor (daily at midnight)',
         replace_existing=True
     )
+    logger.info("")
+    logger.info("✅ Job #2 Registered: 🧹 Database Janitor")
+    logger.info("   ⏱️  Schedule: Daily at 00:00 UTC")
+    logger.info("   📋 Task: Delete articles older than 48 hours")
     # Start the scheduler
+    logger.info("")
+    logger.info("🚀 Starting scheduler engine...")
     scheduler.start()
+    logger.info("")
+    logger.info("═" * 80)
+    logger.info("✅ [SCHEDULER] Background scheduler started successfully!")
+    logger.info("🔄 All jobs are now active and running")
+    logger.info("═" * 80)
+    logger.info("")
 def shutdown_scheduler():
     """
     Gracefully shutdown the scheduler
     """
+    logger.info("")
+    logger.info("═" * 80)
+    logger.info("⏹️  [SCHEDULER] Shutting down background scheduler...")
+    logger.info("⏳ Waiting for running jobs to complete...")
     scheduler.shutdown(wait=True)
+    logger.info("✅ [SCHEDULER] Background scheduler shut down successfully")
+    logger.info("═" * 80)
+    logger.info("")
 # Manual job triggers for testing (can be called from admin endpoints)
 async def trigger_fetch_now():
     """Manually trigger news fetch (for testing)"""
+    logger.info("")
+    logger.info("═" * 80)
+    logger.info("🔧 [MANUAL TRIGGER] Running fetch job NOW...")
+    logger.info("═" * 80)
     await fetch_all_news()
 async def trigger_cleanup_now():
     """Manually trigger cleanup (for testing)"""
+    logger.info("")
+    logger.info("═" * 80)
+    logger.info("🔧 [MANUAL TRIGGER] Running cleanup job NOW...")
+    logger.info("═" * 80)
     await cleanup_old_news()

seed_medium.py ADDED Viewed

	@@ -0,0 +1,105 @@

+"""
+Seed Medium Article Category
+=============================
+One-time script to insert the initial/pinned Medium article into Appwrite database.
+This ensures the "Medium Article" category starts with a specific guide article.
+Usage:
+    python seed_medium.py
+"""
+from appwrite.client import Client
+from appwrite.services.databases import Databases
+from appwrite.id import ID
+from datetime import datetime
+import os
+from dotenv import load_dotenv
+# Load environment variables
+load_dotenv()
+def seed_medium_article():
+    """Insert the seed Medium article into Appwrite"""
+    # Initialize Appwrite client
+    client = Client()
+    client.set_endpoint(os.getenv('APPWRITE_ENDPOINT', 'https://cloud.appwrite.io/v1'))
+    client.set_project(os.getenv('APPWRITE_PROJECT_ID'))
+    client.set_key(os.getenv('APPWRITE_API_KEY'))
+    # Initialize database service
+    databases = Databases(client)
+    # Database and collection IDs
+    database_id = os.getenv('APPWRITE_DATABASE_ID', 'segmento_db')
+    collection_id = os.getenv('APPWRITE_COLLECTION_ID', 'articles')
+    # Article data to insert
+    article_data = {
+        'title': 'Using RSS feeds of profiles, publications, and topics',
+        'description': 'Learn how to use RSS feeds to stay updated with your favorite Medium profiles, publications, and topics. This comprehensive guide covers everything you need to know about accessing and using Medium RSS feeds.',
+        'url': 'https://help.medium.com/hc/en-us/articles/214874118-Using-RSS-feeds-of-profiles-publications-and-topics',
+        'image': 'https://miro.medium.com/v2/resize:fit:1200/1*F0LADxTtsKOgmPa-_7iRcQ.png',  # Medium logo
+        'publishedAt': datetime.now().isoformat(),
+        'source': 'Medium Help',
+        'category': 'medium-article',
+        'isPinned': True,  # Mark as pinned so it always appears first
+    }
+    try:
+        # Check if article already exists (by URL)
+        existing = databases.list_documents(
+            database_id=database_id,
+            collection_id=collection_id,
+            queries=[
+                f'equal("url", "{article_data["url"]}")'
+            ]
+        )
+        if existing['total'] > 0:
+            print("✅ Article already exists in database")
+            print(f"   Document ID: {existing['documents'][0]['$id']}")
+            return existing['documents'][0]
+        # Create the document
+        result = databases.create_document(
+            database_id=database_id,
+            collection_id=collection_id,
+            document_id=ID.unique(),
+            data=article_data
+        )
+        print("✅ Successfully seeded Medium Article!")
+        print(f"   Title: {result['title']}")
+        print(f"   Document ID: {result['$id']}")
+        print(f"   Category: {result['category']}")
+        print(f"   Published At: {result['publishedAt']}")
+        return result
+    except Exception as e:
+        print(f"❌ Error seeding article: {str(e)}")
+        raise
+if __name__ == '__main__':
+    print("=" * 60)
+    print("Seeding Medium Article Category")
+    print("=" * 60)
+    print()
+    # Verify environment variables
+    required_vars = ['APPWRITE_ENDPOINT', 'APPWRITE_PROJECT_ID', 'APPWRITE_API_KEY']
+    missing_vars = [var for var in required_vars if not os.getenv(var)]
+    if missing_vars:
+        print(f"❌ Missing required environment variables: {', '.join(missing_vars)}")
+        print("   Please set them in your .env file")
+        exit(1)
+    seed_medium_article()
+    print()
+    print("=" * 60)
+    print("Seeding Complete!")
+    print("=" * 60)

test_cleanup.py ADDED Viewed

	@@ -0,0 +1,29 @@

+"""
+Test script to manually trigger cleanup job
+Run this to test if the cleanup scheduler works with Appwrite credentials
+"""
+import asyncio
+import sys
+import os
+# Add parent directory to path
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+async def main():
+    from app.services.scheduler import trigger_cleanup_now
+    print("=" * 80)
+    print("🧪 MANUAL TEST: Cleanup Scheduler")
+    print("=" * 80)
+    print("")
+    await trigger_cleanup_now()
+    print("")
+    print("=" * 80)
+    print("✅ Test completed!")
+    print("=" * 80)
+if __name__ == "__main__":
+    asyncio.run(main())

test_scheduler_status.py ADDED Viewed

	@@ -0,0 +1,48 @@

+"""
+Test script to view scheduler status and jobs
+"""
+import asyncio
+import sys
+import os
+# Add parent directory to path
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+async def main():
+    from app.services.scheduler import scheduler
+    from app.services.appwrite_db import get_appwrite_db
+    print("=" * 80)
+    print("📊 SCHEDULER STATUS CHECK")
+    print("=" * 80)
+    print("")
+    # Check Appwrite connection
+    print("🔹 Appwrite Database Status:")
+    appwrite_db = get_appwrite_db()
+    print(f"   Initialized: {appwrite_db.initialized}")
+    print("")
+    # Check scheduler status
+    print("🔹 Scheduler Status:")
+    print(f"   Running: {scheduler.running}")
+    print("")
+    # List jobs
+    print("🔹 Registered Jobs:")
+    jobs = scheduler.get_jobs()
+    if jobs:
+        for job in jobs:
+            print(f"   - {job.name} (ID: {job.id})")
+            print(f"     Next run: {job.next_run_time}")
+            print(f"     Trigger: {job.trigger}")
+            print("")
+    else:
+        print("   No jobs registered")
+        print("")
+    print("=" * 80)
+if __name__ == "__main__":
+    asyncio.run(main())