SHAFI commited on
Commit Β·
f0a6b9e
1
Parent(s): 1c9809b
chore: Sync backend code with latest changes
Browse files- Updated database cleanup and maintenance scripts
- Maintained scheduler service configuration
- Backend ready for production deployment
- app/services/appwrite_db.py +3 -2
- app/services/scheduler.py +1 -0
- check_database_status.py +194 -0
app/services/appwrite_db.py
CHANGED
|
@@ -276,8 +276,9 @@ class AppwriteDatabase:
|
|
| 276 |
# Get counts by category
|
| 277 |
categories = [
|
| 278 |
"ai", "data-security", "data-governance", "data-privacy",
|
| 279 |
-
"data-engineering", "
|
| 280 |
-
"customer-data-platform", "data-centers",
|
|
|
|
| 281 |
]
|
| 282 |
|
| 283 |
articles_by_category = {}
|
|
|
|
| 276 |
# Get counts by category
|
| 277 |
categories = [
|
| 278 |
"ai", "data-security", "data-governance", "data-privacy",
|
| 279 |
+
"data-engineering", "data-management", "business-intelligence",
|
| 280 |
+
"business-analytics", "customer-data-platform", "data-centers",
|
| 281 |
+
"cloud-computing", "magazines"
|
| 282 |
]
|
| 283 |
|
| 284 |
articles_by_category = {}
|
app/services/scheduler.py
CHANGED
|
@@ -28,6 +28,7 @@ CATEGORIES = [
|
|
| 28 |
"data-governance",
|
| 29 |
"data-privacy",
|
| 30 |
"data-engineering",
|
|
|
|
| 31 |
"business-intelligence",
|
| 32 |
"business-analytics",
|
| 33 |
"customer-data-platform",
|
|
|
|
| 28 |
"data-governance",
|
| 29 |
"data-privacy",
|
| 30 |
"data-engineering",
|
| 31 |
+
"data-management",
|
| 32 |
"business-intelligence",
|
| 33 |
"business-analytics",
|
| 34 |
"customer-data-platform",
|
check_database_status.py
ADDED
|
@@ -0,0 +1,194 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Database Status Checker
|
| 3 |
+
Provides comprehensive diagnostics for Appwrite database and scheduler status
|
| 4 |
+
"""
|
| 5 |
+
|
| 6 |
+
import asyncio
|
| 7 |
+
import sys
|
| 8 |
+
import os
|
| 9 |
+
from datetime import datetime, timedelta
|
| 10 |
+
|
| 11 |
+
# Add parent directory to path
|
| 12 |
+
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
|
| 13 |
+
|
| 14 |
+
async def main():
|
| 15 |
+
from app.services.appwrite_db import get_appwrite_db
|
| 16 |
+
from app.services.scheduler import scheduler
|
| 17 |
+
from app.config import settings
|
| 18 |
+
from appwrite.query import Query
|
| 19 |
+
|
| 20 |
+
print("=" * 80)
|
| 21 |
+
print("π DATABASE & SCHEDULER STATUS REPORT")
|
| 22 |
+
print(f"π
Generated: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
|
| 23 |
+
print("=" * 80)
|
| 24 |
+
print("")
|
| 25 |
+
|
| 26 |
+
# ===== PART 1: Appwrite Connection =====
|
| 27 |
+
print("β" * 80)
|
| 28 |
+
print("π PART 1: APPWRITE DATABASE CONNECTION")
|
| 29 |
+
print("β" * 80)
|
| 30 |
+
|
| 31 |
+
appwrite_db = get_appwrite_db()
|
| 32 |
+
print(f"β Initialized: {appwrite_db.initialized}")
|
| 33 |
+
print(f"β Endpoint: {settings.APPWRITE_ENDPOINT}")
|
| 34 |
+
print(f"β Database ID: {settings.APPWRITE_DATABASE_ID}")
|
| 35 |
+
print(f"β Collection ID: {settings.APPWRITE_COLLECTION_ID}")
|
| 36 |
+
print("")
|
| 37 |
+
|
| 38 |
+
if not appwrite_db.initialized:
|
| 39 |
+
print("β ERROR: Appwrite is not initialized!")
|
| 40 |
+
print("π‘ Check your .env file for valid credentials")
|
| 41 |
+
return
|
| 42 |
+
|
| 43 |
+
# ===== PART 2: Total Article Count =====
|
| 44 |
+
print("β" * 80)
|
| 45 |
+
print("π PART 2: TOTAL ARTICLE COUNT")
|
| 46 |
+
print("β" * 80)
|
| 47 |
+
|
| 48 |
+
try:
|
| 49 |
+
stats = await appwrite_db.get_stats()
|
| 50 |
+
total_articles = stats.get('total_articles', 0)
|
| 51 |
+
|
| 52 |
+
print(f"π’ TOTAL ARTICLES IN DATABASE: {total_articles:,}")
|
| 53 |
+
print("")
|
| 54 |
+
|
| 55 |
+
# ===== PART 3: Articles by Category =====
|
| 56 |
+
print("β" * 80)
|
| 57 |
+
print("π PART 3: ARTICLES BY CATEGORY")
|
| 58 |
+
print("β" * 80)
|
| 59 |
+
|
| 60 |
+
articles_by_category = stats.get('articles_by_category', {})
|
| 61 |
+
for category, count in sorted(articles_by_category.items(), key=lambda x: x[1], reverse=True):
|
| 62 |
+
print(f" {category:30s} : {count:6,} articles")
|
| 63 |
+
print("")
|
| 64 |
+
|
| 65 |
+
except Exception as e:
|
| 66 |
+
print(f"β Error getting stats: {e}")
|
| 67 |
+
print("")
|
| 68 |
+
|
| 69 |
+
# ===== PART 4: Age Analysis =====
|
| 70 |
+
print("β" * 80)
|
| 71 |
+
print("β° PART 4: ARTICLE AGE ANALYSIS")
|
| 72 |
+
print("β" * 80)
|
| 73 |
+
|
| 74 |
+
try:
|
| 75 |
+
# Articles older than 48 hours (should be deleted by cleanup)
|
| 76 |
+
cutoff_48h = (datetime.now() - timedelta(hours=48)).isoformat()
|
| 77 |
+
response_48h = appwrite_db.databases.list_documents(
|
| 78 |
+
database_id=settings.APPWRITE_DATABASE_ID,
|
| 79 |
+
collection_id=settings.APPWRITE_COLLECTION_ID,
|
| 80 |
+
queries=[
|
| 81 |
+
Query.less_than('published_at', cutoff_48h),
|
| 82 |
+
Query.limit(1)
|
| 83 |
+
]
|
| 84 |
+
)
|
| 85 |
+
old_articles = response_48h['total']
|
| 86 |
+
|
| 87 |
+
# Articles from last 24 hours (recent)
|
| 88 |
+
cutoff_24h = (datetime.now() - timedelta(hours=24)).isoformat()
|
| 89 |
+
response_24h = appwrite_db.databases.list_documents(
|
| 90 |
+
database_id=settings.APPWRITE_DATABASE_ID,
|
| 91 |
+
collection_id=settings.APPWRITE_COLLECTION_ID,
|
| 92 |
+
queries=[
|
| 93 |
+
Query.greater_than('published_at', cutoff_24h),
|
| 94 |
+
Query.limit(1)
|
| 95 |
+
]
|
| 96 |
+
)
|
| 97 |
+
recent_articles = response_24h['total']
|
| 98 |
+
|
| 99 |
+
print(f"π
Articles older than 48 hours: {old_articles:,}")
|
| 100 |
+
print(f" β οΈ These SHOULD be cleaned up by the scheduler")
|
| 101 |
+
print("")
|
| 102 |
+
print(f"π Articles from last 24 hours: {recent_articles:,}")
|
| 103 |
+
print(f" β These are fresh articles")
|
| 104 |
+
print("")
|
| 105 |
+
|
| 106 |
+
if old_articles > 0:
|
| 107 |
+
print("β οΈ WARNING: Old articles detected!")
|
| 108 |
+
print(f" The scheduler should delete {min(old_articles, 500)} articles on next run")
|
| 109 |
+
print("")
|
| 110 |
+
else:
|
| 111 |
+
print("β
GOOD: No articles older than 48 hours found")
|
| 112 |
+
print(" Cleanup scheduler is working correctly!")
|
| 113 |
+
print("")
|
| 114 |
+
|
| 115 |
+
except Exception as e:
|
| 116 |
+
print(f"β Error analyzing article age: {e}")
|
| 117 |
+
print("")
|
| 118 |
+
|
| 119 |
+
# ===== PART 5: Scheduler Status =====
|
| 120 |
+
print("β" * 80)
|
| 121 |
+
print("β° PART 5: SCHEDULER STATUS")
|
| 122 |
+
print("β" * 80)
|
| 123 |
+
|
| 124 |
+
print(f"π Scheduler Running: {scheduler.running}")
|
| 125 |
+
print("")
|
| 126 |
+
|
| 127 |
+
jobs = scheduler.get_jobs()
|
| 128 |
+
if jobs:
|
| 129 |
+
print(f"π Registered Jobs: {len(jobs)}")
|
| 130 |
+
print("")
|
| 131 |
+
for job in jobs:
|
| 132 |
+
print(f" πΉ {job.name}")
|
| 133 |
+
print(f" ID: {job.id}")
|
| 134 |
+
print(f" Next Run: {job.next_run_time}")
|
| 135 |
+
print(f" Trigger: {job.trigger}")
|
| 136 |
+
print("")
|
| 137 |
+
else:
|
| 138 |
+
print("β οΈ No jobs registered")
|
| 139 |
+
print("π‘ The scheduler might not have started yet")
|
| 140 |
+
print("")
|
| 141 |
+
|
| 142 |
+
# ===== PART 6: Cleanup Efficiency =====
|
| 143 |
+
print("β" * 80)
|
| 144 |
+
print("π§Ή PART 6: CLEANUP SCHEDULER EFFICIENCY")
|
| 145 |
+
print("β" * 80)
|
| 146 |
+
|
| 147 |
+
print("π Cleanup Configuration:")
|
| 148 |
+
print(f" β’ Retention Policy: 48 hours (articles older than this are deleted)")
|
| 149 |
+
print(f" β’ Cleanup Frequency: Every 6 hours (00:00, 06:00, 12:00, 18:00 UTC)")
|
| 150 |
+
print(f" β’ Cleanup Capacity: 500 articles per run")
|
| 151 |
+
print(f" β’ Daily Cleanup Limit: 2,000 articles/day")
|
| 152 |
+
print("")
|
| 153 |
+
|
| 154 |
+
if old_articles > 0:
|
| 155 |
+
days_to_clear = (old_articles / 2000)
|
| 156 |
+
print(f"β³ Estimated time to clear {old_articles:,} old articles:")
|
| 157 |
+
print(f" {days_to_clear:.1f} days at current cleanup rate")
|
| 158 |
+
print("")
|
| 159 |
+
|
| 160 |
+
if days_to_clear > 3:
|
| 161 |
+
print("β οΈ WARNING: Cleanup is falling behind!")
|
| 162 |
+
print("π‘ Recommendations:")
|
| 163 |
+
print(" 1. Run manual cleanup: python test_cleanup.py")
|
| 164 |
+
print(" 2. Consider increasing cleanup limit in scheduler.py")
|
| 165 |
+
print(" 3. Reduce article retention to 24 hours instead of 48")
|
| 166 |
+
print("")
|
| 167 |
+
|
| 168 |
+
# ===== PART 7: Why Platform Shows Fewer Articles =====
|
| 169 |
+
print("β" * 80)
|
| 170 |
+
print("π PART 7: WHY PLATFORM SHOWS FEWER ARTICLES")
|
| 171 |
+
print("β" * 80)
|
| 172 |
+
|
| 173 |
+
print(f"π‘ Your database has {total_articles:,} articles")
|
| 174 |
+
print(f" BUT your API is configured to show only 20 articles per category")
|
| 175 |
+
print("")
|
| 176 |
+
print("π Location: app/routes/news.py:49")
|
| 177 |
+
print(" Code: db_articles = await appwrite_db.get_articles(category, limit=20)")
|
| 178 |
+
print("")
|
| 179 |
+
print("This is INTENTIONAL for:")
|
| 180 |
+
print(" β Fast response times")
|
| 181 |
+
print(" β Better user experience")
|
| 182 |
+
print(" β Reduced bandwidth usage")
|
| 183 |
+
print("")
|
| 184 |
+
print("If you want to show more articles, you can:")
|
| 185 |
+
print(" 1. Increase the limit parameter in the API")
|
| 186 |
+
print(" 2. Implement pagination to load more articles on demand")
|
| 187 |
+
print("")
|
| 188 |
+
|
| 189 |
+
print("=" * 80)
|
| 190 |
+
print("β
REPORT COMPLETE")
|
| 191 |
+
print("=" * 80)
|
| 192 |
+
|
| 193 |
+
if __name__ == "__main__":
|
| 194 |
+
asyncio.run(main())
|